aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/IPO')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp631
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ExtractGV.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp440
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp149
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp332
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp368
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp977
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp262
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ModuleInliner.cpp354
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp684
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp164
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp226
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp21
32 files changed, 3598 insertions, 1470 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 532599b42e0d..01e724e22dcf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -73,8 +73,8 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
},
ORE);
assert(OIC);
- emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller,
- *OIC, false, DEBUG_TYPE);
+ emitInlinedIntoBasedOnCost(ORE, CB->getDebugLoc(), CB->getParent(), F,
+ *Caller, *OIC, false, DEBUG_TYPE);
InlineFunctionInfo IFI(
/*cg=*/nullptr, GetAssumptionCache, &PSI,
@@ -108,8 +108,10 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
// Delete the non-comdat ones from the module and also from our vector.
auto NonComdatBegin = partition(
InlinedFunctions, [&](Function *F) { return F->hasComdat(); });
- for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end()))
+ for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end())) {
M.getFunctionList().erase(F);
+ Changed = true;
+ }
InlinedFunctions.erase(NonComdatBegin, InlinedFunctions.end());
if (!InlinedFunctions.empty()) {
@@ -117,8 +119,10 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
// are not actually dead.
filterDeadComdatFunctions(M, InlinedFunctions);
// The remaining functions are actually dead.
- for (Function *F : InlinedFunctions)
+ for (Function *F : InlinedFunctions) {
M.getFunctionList().erase(F);
+ Changed = true;
+ }
}
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index f670a101767e..93bb11433775 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -148,7 +148,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
} else if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
- ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
@@ -177,9 +177,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Since loads will only have a single operand, and GEPs only a single
// non-index operand, this will record direct loads without any indices,
// and gep+loads with the GEP indices.
- for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
- II != IE; ++II)
- Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ for (const Use &I : llvm::drop_begin(UI->operands()))
+ Indices.push_back(cast<ConstantInt>(I)->getSExtValue());
// GEPs with a single 0 index can be merged with direct loads
if (Indices.size() == 1 && Indices.front() == 0)
Indices.clear();
@@ -231,8 +230,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Recompute the parameter attributes list based on the new arguments for
// the function.
- NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
- PAL.getRetAttributes(), ArgAttrVec));
+ NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
+ PAL.getRetAttrs(), ArgAttrVec));
ArgAttrVec.clear();
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
@@ -257,7 +256,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
++I, ++AI, ++ArgNo)
if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
- ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
} else if (ByValArgsToTransform.count(&*I)) {
// Emit a GEP and load for each element of the struct.
Type *AgTy = I->getParamByValType();
@@ -313,9 +312,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
newLoad->setAlignment(OrigLoad->getAlign());
// Transfer the AA info too.
- AAMDNodes AAInfo;
- OrigLoad->getAAMetadata(AAInfo);
- newLoad->setAAMetadata(AAInfo);
+ newLoad->setAAMetadata(OrigLoad->getAAMetadata());
Args.push_back(newLoad);
ArgAttrVec.push_back(AttributeSet());
@@ -325,7 +322,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Push any varargs arguments on the list.
for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
Args.push_back(*AI);
- ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
}
SmallVector<OperandBundleDef, 1> OpBundles;
@@ -341,9 +338,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
NewCS = NewCall;
}
NewCS->setCallingConv(CB.getCallingConv());
- NewCS->setAttributes(
- AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
- CallPAL.getRetAttributes(), ArgAttrVec));
+ NewCS->setAttributes(AttributeList::get(F->getContext(),
+ CallPAL.getFnAttrs(),
+ CallPAL.getRetAttrs(), ArgAttrVec));
NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
Args.clear();
ArgAttrVec.clear();
@@ -1018,11 +1015,12 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
do {
LocalChange = false;
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+
for (LazyCallGraph::Node &N : C) {
Function &OldF = N.getFunction();
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
// FIXME: This lambda must only be used with this function. We should
// skip the lambda and just get the AA results directly.
auto AARGetter = [&](Function &F) -> AAResults & {
@@ -1045,6 +1043,13 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
FAM.clear(OldF, OldF.getName());
OldF.eraseFromParent();
+
+ PreservedAnalyses FuncPA;
+ FuncPA.preserveSet<CFGAnalyses>();
+ for (auto *U : NewF->users()) {
+ auto *UserF = cast<CallBase>(U)->getFunction();
+ FAM.invalidate(*UserF, FuncPA);
+ }
}
Changed |= LocalChange;
@@ -1053,7 +1058,12 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
if (!Changed)
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+ PreservedAnalyses PA;
+ // We've cleared out analyses for deleted functions.
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We've manually invalidated analyses for functions we've modified.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
}
namespace {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
index 91b16ec66ee3..edadc79e3a9f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -382,30 +382,30 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
if (Attr.isEnumAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
if (Attr.isStringAttribute()) {
StringRef Kind = Attr.getKindAsString();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
if (Attr.isIntAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.removeAttributeAtIndex(Ctx, AttrIdx, Kind);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
@@ -658,9 +658,9 @@ bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
else
AttrList = getAssociatedFunction()->getAttributes();
- bool HasAttr = AttrList.hasAttribute(getAttrIdx(), AK);
+ bool HasAttr = AttrList.hasAttributeAtIndex(getAttrIdx(), AK);
if (HasAttr)
- Attrs.push_back(AttrList.getAttribute(getAttrIdx(), AK));
+ Attrs.push_back(AttrList.getAttributeAtIndex(getAttrIdx(), AK));
return HasAttr;
}
@@ -1043,6 +1043,8 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
if (&SI->getOperandUse(0) == U) {
+ if (!Visited.insert(U).second)
+ continue;
SmallSetVector<Value *, 4> PotentialCopies;
if (AA::getPotentialCopiesOfStoredValue(*this, *SI, PotentialCopies,
QueryingAA,
@@ -1121,6 +1123,10 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
if (CE->isCast() && CE->getType()->isPointerTy() &&
CE->getType()->getPointerElementType()->isFunctionTy()) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor] Use, is constant cast expression, add "
+ << CE->getNumUses()
+ << " uses of that expression instead!\n");
for (const Use &CEU : CE->uses())
Uses.push_back(&CEU);
continue;
@@ -1141,9 +1147,13 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const Use *EffectiveUse =
ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U;
if (!ACS.isCallee(EffectiveUse)) {
- if (!RequireAllCallSites)
+ if (!RequireAllCallSites) {
+ LLVM_DEBUG(dbgs() << "[Attributor] User " << *EffectiveUse->getUser()
+ << " is not a call of " << Fn.getName()
+ << ", skip use\n");
continue;
- LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser()
+ }
+ LLVM_DEBUG(dbgs() << "[Attributor] User " << *EffectiveUse->getUser()
<< " is an invalid use of " << Fn.getName() << "\n");
return false;
}
@@ -1413,6 +1423,16 @@ void Attributor::runTillFixpoint() {
} while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
VerifyMaxFixpointIterations));
+ if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) {
+ auto Remark = [&](OptimizationRemarkMissed ORM) {
+ return ORM << "Attributor did not reach a fixpoint after "
+ << ore::NV("Iterations", MaxFixedPointIterations)
+ << " iterations.";
+ };
+ Function *F = Worklist.front()->getIRPosition().getAssociatedFunction();
+ emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark);
+ }
+
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
<< IterationCounter << "/" << MaxFixpointIterations
<< " iterations\n");
@@ -1922,7 +1942,7 @@ void Attributor::createShallowWrapper(Function &F) {
CallInst *CI = CallInst::Create(&F, Args, "", EntryBB);
CI->setTailCall(true);
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
+ CI->addFnAttr(Attribute::NoInline);
ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB);
NumFnShallowWrappersCreated++;
@@ -2015,7 +2035,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
if (!RewriteSignatures)
return false;
- auto CallSiteCanBeChanged = [](AbstractCallSite ACS) {
+ Function *Fn = Arg.getParent();
+ auto CallSiteCanBeChanged = [Fn](AbstractCallSite ACS) {
// Forbid the call site to cast the function return type. If we need to
// rewrite these functions we need to re-create a cast for the new call site
// (if the old had uses).
@@ -2023,11 +2044,12 @@ bool Attributor::isValidFunctionSignatureRewrite(
ACS.getInstruction()->getType() !=
ACS.getCalledFunction()->getReturnType())
return false;
+ if (ACS.getCalledOperand()->getType() != Fn->getType())
+ return false;
// Forbid must-tail calls for now.
return !ACS.isCallbackCall() && !ACS.getInstruction()->isMustTailCall();
};
- Function *Fn = Arg.getParent();
// Avoid var-arg functions for now.
if (Fn->isVarArg()) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite var-args functions\n");
@@ -2157,7 +2179,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
} else {
NewArgumentTypes.push_back(Arg.getType());
NewArgumentAttributes.push_back(
- OldFnAttributeList.getParamAttributes(Arg.getArgNo()));
+ OldFnAttributeList.getParamAttrs(Arg.getArgNo()));
}
}
@@ -2188,8 +2210,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
// the function.
LLVMContext &Ctx = OldFn->getContext();
NewFn->setAttributes(AttributeList::get(
- Ctx, OldFnAttributeList.getFnAttributes(),
- OldFnAttributeList.getRetAttributes(), NewArgumentAttributes));
+ Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(),
+ NewArgumentAttributes));
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
@@ -2234,7 +2256,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
} else {
NewArgOperands.push_back(ACS.getCallArgOperand(OldArgNum));
NewArgOperandAttributes.push_back(
- OldCallAttributeList.getParamAttributes(OldArgNum));
+ OldCallAttributeList.getParamAttrs(OldArgNum));
}
}
@@ -2264,8 +2286,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
NewCB->setCallingConv(OldCB->getCallingConv());
NewCB->takeName(OldCB);
NewCB->setAttributes(AttributeList::get(
- Ctx, OldCallAttributeList.getFnAttributes(),
- OldCallAttributeList.getRetAttributes(), NewArgOperandAttributes));
+ Ctx, OldCallAttributeList.getFnAttrs(),
+ OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes));
CallSitePairs.push_back({OldCB, NewCB});
return true;
@@ -2480,6 +2502,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function can be "readnone/argmemonly/inaccessiblememonly/...".
getOrCreateAAFor<AAMemoryLocation>(FPos);
+ // Every function can track active assumptions.
+ getOrCreateAAFor<AAAssumptionInfo>(FPos);
+
// Every function might be applicable for Heap-To-Stack conversion.
if (EnableHeapToStack)
getOrCreateAAFor<AAHeapToStack>(FPos);
@@ -2565,6 +2590,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
auto CallSitePred = [&](Instruction &I) -> bool {
auto &CB = cast<CallBase>(I);
IRPosition CBRetPos = IRPosition::callsite_returned(CB);
+ IRPosition CBFnPos = IRPosition::callsite_function(CB);
// Call sites might be dead if they do not have side effects and no live
// users. The return value might be dead if there are no live users.
@@ -2576,6 +2602,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (!Callee)
return true;
+ // Every call site can track active assumptions.
+ getOrCreateAAFor<AAAssumptionInfo>(CBFnPos);
+
// Skip declarations except if annotations on their call sites were
// explicitly requested.
if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
@@ -2588,7 +2617,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAValueSimplify>(CBRetPos);
}
- for (int I = 0, E = CB.getNumArgOperands(); I < E; ++I) {
+ for (int I = 0, E = CB.arg_size(); I < E; ++I) {
IRPosition CBArgPos = IRPosition::callsite_argument(CB, I);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3529923a9082..ec08287393de 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -28,6 +29,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Assumptions.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
@@ -146,6 +148,7 @@ PIPE_OPERATOR(AANoUndef)
PIPE_OPERATOR(AACallEdges)
PIPE_OPERATOR(AAFunctionReachability)
PIPE_OPERATOR(AAPointerInfo)
+PIPE_OPERATOR(AAAssumptionInfo)
#undef PIPE_OPERATOR
@@ -203,46 +206,25 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
<< "-bytes as " << *ResTy << "\n");
if (Offset) {
- SmallVector<Value *, 4> Indices;
- std::string GEPName = Ptr->getName().str() + ".0";
-
- // Add 0 index to look through the pointer.
- assert((uint64_t)Offset < DL.getTypeAllocSize(PtrElemTy) &&
- "Offset out of bounds");
- Indices.push_back(Constant::getNullValue(IRB.getInt32Ty()));
-
Type *Ty = PtrElemTy;
- do {
- auto *STy = dyn_cast<StructType>(Ty);
- if (!STy)
- // Non-aggregate type, we cast and make byte-wise progress now.
- break;
-
- const StructLayout *SL = DL.getStructLayout(STy);
- if (int64_t(SL->getSizeInBytes()) < Offset)
- break;
-
- uint64_t Idx = SL->getElementContainingOffset(Offset);
- assert(Idx < STy->getNumElements() && "Offset calculation error!");
- uint64_t Rem = Offset - SL->getElementOffset(Idx);
- Ty = STy->getElementType(Idx);
-
- LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset
- << " Idx: " << Idx << " Rem: " << Rem << "\n");
+ APInt IntOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
+ SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(Ty, IntOffset);
- GEPName += "." + std::to_string(Idx);
- Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx));
- Offset = Rem;
- } while (Offset);
+ SmallVector<Value *, 4> ValIndices;
+ std::string GEPName = Ptr->getName().str();
+ for (const APInt &Index : IntIndices) {
+ ValIndices.push_back(IRB.getInt(Index));
+ GEPName += "." + std::to_string(Index.getZExtValue());
+ }
// Create a GEP for the indices collected above.
- Ptr = IRB.CreateGEP(PtrElemTy, Ptr, Indices, GEPName);
+ Ptr = IRB.CreateGEP(PtrElemTy, Ptr, ValIndices, GEPName);
// If an offset is left we use byte-wise adjustment.
- if (Offset) {
+ if (IntOffset != 0) {
Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
- Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt32(Offset),
- GEPName + ".b" + Twine(Offset));
+ Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(IntOffset),
+ GEPName + ".b" + Twine(IntOffset.getZExtValue()));
}
}
@@ -431,6 +413,7 @@ const Value *stripAndAccumulateMinimalOffsets(
};
return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds,
+ /* AllowInvariant */ false,
AttributorAnalysis);
}
@@ -503,6 +486,7 @@ static void clampReturnedValueStates(
S ^= *T;
}
+namespace {
/// Helper class for generic deduction: return value -> returned position.
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
@@ -661,6 +645,7 @@ struct AACallSiteReturnedFromReturned : public BaseType {
return clampStateAndIndicateChange(S, AA.getState());
}
};
+} // namespace
/// Helper function to accumulate uses.
template <class AAType, typename StateType = typename AAType::StateType>
@@ -1051,6 +1036,7 @@ private:
BooleanState BS;
};
+namespace {
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
@@ -1207,7 +1193,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
}
SmallVector<Value *, 8> Indices;
- for (Use &Idx : llvm::make_range(GEP->idx_begin(), GEP->idx_end())) {
+ for (Use &Idx : GEP->indices()) {
if (auto *CIdx = dyn_cast<ConstantInt>(Idx)) {
Indices.push_back(CIdx);
continue;
@@ -1244,7 +1230,11 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
}
// Check if the PHI operand is not dependent on the PHI itself.
- APInt Offset(DL.getIndexTypeSizeInBits(AssociatedValue.getType()), 0);
+ // TODO: This is not great as we look at the pointer type. However, it
+ // is unclear where the Offset size comes from with typeless pointers.
+ APInt Offset(
+ DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()),
+ 0);
if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true)) {
if (Offset != PtrOI.Offset) {
@@ -2432,6 +2422,10 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
const size_t NoUBPrevSize = AssumedNoUBInsts.size();
auto InspectMemAccessInstForUB = [&](Instruction &I) {
+ // Lang ref now states volatile store is not UB, let's skip them.
+ if (I.isVolatile() && I.mayWriteToMemory())
+ return true;
+
// Skip instructions that are already saved.
if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
return true;
@@ -2511,7 +2505,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
Function *Callee = CB.getCalledFunction();
if (!Callee)
return true;
- for (unsigned idx = 0; idx < CB.getNumArgOperands(); idx++) {
+ for (unsigned idx = 0; idx < CB.arg_size(); idx++) {
// If current argument is known to be simplified to null pointer and the
// corresponding argument position is known to have nonnull attribute,
// the argument is poison. Furthermore, if the argument is poison and
@@ -3179,8 +3173,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
// value passed at this call site.
// TODO: AbstractCallSite
const auto &CB = cast<CallBase>(getAnchorValue());
- for (unsigned OtherArgNo = 0; OtherArgNo < CB.getNumArgOperands();
- OtherArgNo++)
+ for (unsigned OtherArgNo = 0; OtherArgNo < CB.arg_size(); OtherArgNo++)
if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo))
return false;
@@ -3398,6 +3391,10 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
}
bool isDeadStore(Attributor &A, StoreInst &SI) {
+ // Lang ref now states volatile store is not UB/dead, let's skip them.
+ if (SI.isVolatile())
+ return false;
+
bool UsedAssumedInformation = false;
SmallSetVector<Value *, 4> PotentialCopies;
if (!AA::getPotentialCopiesOfStoredValue(A, SI, PotentialCopies, *this,
@@ -5083,6 +5080,7 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
STATS_DECLTRACK_CSRET_ATTR(nocapture)
}
};
+} // namespace
/// ------------------ Value Simplify Attribute ----------------------------
@@ -5103,6 +5101,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return true;
}
+namespace {
struct AAValueSimplifyImpl : AAValueSimplify {
AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
: AAValueSimplify(IRP, A) {}
@@ -6508,7 +6507,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
auto IsCompatiblePrivArgOfDirectCS = [&](AbstractCallSite ACS) {
CallBase *DC = cast<CallBase>(ACS.getInstruction());
int DCArgNo = ACS.getCallArgOperandNo(ArgNo);
- assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->getNumArgOperands() &&
+ assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->arg_size() &&
"Expected a direct call operand for callback call operand");
LLVM_DEBUG({
@@ -7331,10 +7330,12 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
case Instruction::Store:
// Stores cause the NO_WRITES property to disappear if the use is the
- // pointer operand. Note that we do assume that capturing was taken care of
- // somewhere else.
+ // pointer operand. Note that while capturing was taken care of somewhere
+ // else we need to deal with stores of the value that is not looked through.
if (cast<StoreInst>(UserI)->getPointerOperand() == U.get())
removeAssumedBits(NO_WRITES);
+ else
+ indicatePessimisticFixpoint();
return;
case Instruction::Call:
@@ -7380,6 +7381,7 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
+} // namespace
/// -------------------- Memory Locations Attributes ---------------------------
/// Includes read-none, argmemonly, inaccessiblememonly,
@@ -7672,11 +7674,14 @@ void AAMemoryLocationImpl::categorizePtrValue(
assert(!isa<GEPOperator>(Obj) && "GEPs should have been stripped.");
if (isa<UndefValue>(Obj))
continue;
- if (auto *Arg = dyn_cast<Argument>(Obj)) {
- if (Arg->hasByValAttr())
- MLK = NO_LOCAL_MEM;
- else
- MLK = NO_ARGUMENT_MEM;
+ if (isa<Argument>(Obj)) {
+ // TODO: For now we do not treat byval arguments as local copies performed
+ // on the call edge, though, we should. To make that happen we need to
+ // teach various passes, e.g., DSE, about the copy effect of a byval. That
+ // would also allow us to mark functions only accessing byval arguments as
+ // readnone again, atguably their acceses have no effect outside of the
+ // function, like accesses to allocas.
+ MLK = NO_ARGUMENT_MEM;
} else if (auto *GV = dyn_cast<GlobalValue>(Obj)) {
// Reading constant memory is not treated as a read "effect" by the
// function attr pass so we won't neither. Constants defined by TBAA are
@@ -7722,7 +7727,7 @@ void AAMemoryLocationImpl::categorizePtrValue(
void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
Attributor &A, CallBase &CB, AAMemoryLocation::StateType &AccessedLocs,
bool &Changed) {
- for (unsigned ArgNo = 0, E = CB.getNumArgOperands(); ArgNo < E; ++ArgNo) {
+ for (unsigned ArgNo = 0, E = CB.arg_size(); ArgNo < E; ++ArgNo) {
// Skip non-pointer arguments.
const Value *ArgOp = CB.getArgOperand(ArgNo);
@@ -8655,31 +8660,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
static bool calculateICmpInst(const ICmpInst *ICI, const APInt &LHS,
const APInt &RHS) {
- ICmpInst::Predicate Pred = ICI->getPredicate();
- switch (Pred) {
- case ICmpInst::ICMP_UGT:
- return LHS.ugt(RHS);
- case ICmpInst::ICMP_SGT:
- return LHS.sgt(RHS);
- case ICmpInst::ICMP_EQ:
- return LHS.eq(RHS);
- case ICmpInst::ICMP_UGE:
- return LHS.uge(RHS);
- case ICmpInst::ICMP_SGE:
- return LHS.sge(RHS);
- case ICmpInst::ICMP_ULT:
- return LHS.ult(RHS);
- case ICmpInst::ICMP_SLT:
- return LHS.slt(RHS);
- case ICmpInst::ICMP_NE:
- return LHS.ne(RHS);
- case ICmpInst::ICMP_ULE:
- return LHS.ule(RHS);
- case ICmpInst::ICMP_SLE:
- return LHS.sle(RHS);
- default:
- llvm_unreachable("Invalid ICmp predicate!");
- }
+ return ICmpInst::compare(LHS, RHS, ICI->getPredicate());
}
static APInt calculateCastInst(const CastInst *CI, const APInt &Src,
@@ -8719,25 +8700,25 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
case Instruction::Mul:
return LHS * RHS;
case Instruction::UDiv:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.udiv(RHS);
case Instruction::SDiv:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.sdiv(RHS);
case Instruction::URem:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.urem(RHS);
case Instruction::SRem:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
@@ -9336,32 +9317,69 @@ struct AANoUndefCallSiteReturned final
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
};
-struct AACallEdgesFunction : public AACallEdges {
- AACallEdgesFunction(const IRPosition &IRP, Attributor &A)
- : AACallEdges(IRP, A) {}
+struct AACallEdgesImpl : public AACallEdges {
+ AACallEdgesImpl(const IRPosition &IRP, Attributor &A) : AACallEdges(IRP, A) {}
+ virtual const SetVector<Function *> &getOptimisticEdges() const override {
+ return CalledFunctions;
+ }
+
+ virtual bool hasUnknownCallee() const override { return HasUnknownCallee; }
+
+ virtual bool hasNonAsmUnknownCallee() const override {
+ return HasUnknownCalleeNonAsm;
+ }
+
+ const std::string getAsStr() const override {
+ return "CallEdges[" + std::to_string(HasUnknownCallee) + "," +
+ std::to_string(CalledFunctions.size()) + "]";
+ }
+
+ void trackStatistics() const override {}
+
+protected:
+ void addCalledFunction(Function *Fn, ChangeStatus &Change) {
+ if (CalledFunctions.insert(Fn)) {
+ Change = ChangeStatus::CHANGED;
+ LLVM_DEBUG(dbgs() << "[AACallEdges] New call edge: " << Fn->getName()
+ << "\n");
+ }
+ }
+
+ void setHasUnknownCallee(bool NonAsm, ChangeStatus &Change) {
+ if (!HasUnknownCallee)
+ Change = ChangeStatus::CHANGED;
+ if (NonAsm && !HasUnknownCalleeNonAsm)
+ Change = ChangeStatus::CHANGED;
+ HasUnknownCalleeNonAsm |= NonAsm;
+ HasUnknownCallee = true;
+ }
+
+private:
+ /// Optimistic set of functions that might be called by this position.
+ SetVector<Function *> CalledFunctions;
+
+ /// Is there any call with a unknown callee.
+ bool HasUnknownCallee = false;
+
+ /// Is there any call with a unknown callee, excluding any inline asm.
+ bool HasUnknownCalleeNonAsm = false;
+};
+
+struct AACallEdgesCallSite : public AACallEdgesImpl {
+ AACallEdgesCallSite(const IRPosition &IRP, Attributor &A)
+ : AACallEdgesImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Change = ChangeStatus::UNCHANGED;
- bool OldHasUnknownCallee = HasUnknownCallee;
- bool OldHasUnknownCalleeNonAsm = HasUnknownCalleeNonAsm;
-
- auto AddCalledFunction = [&](Function *Fn) {
- if (CalledFunctions.insert(Fn)) {
- Change = ChangeStatus::CHANGED;
- LLVM_DEBUG(dbgs() << "[AACallEdges] New call edge: " << Fn->getName()
- << "\n");
- }
- };
auto VisitValue = [&](Value &V, const Instruction *CtxI, bool &HasUnknown,
bool Stripped) -> bool {
if (Function *Fn = dyn_cast<Function>(&V)) {
- AddCalledFunction(Fn);
+ addCalledFunction(Fn, Change);
} else {
LLVM_DEBUG(dbgs() << "[AACallEdges] Unrecognized value: " << V << "\n");
- HasUnknown = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
// Explore all values.
@@ -9369,44 +9387,67 @@ struct AACallEdgesFunction : public AACallEdges {
};
// Process any value that we might call.
- auto ProcessCalledOperand = [&](Value *V, Instruction *Ctx) {
+ auto ProcessCalledOperand = [&](Value *V) {
+ bool DummyValue = false;
if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this,
- HasUnknownCallee, VisitValue, nullptr,
+ DummyValue, VisitValue, nullptr,
false)) {
// If we haven't gone through all values, assume that there are unknown
// callees.
- HasUnknownCallee = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
};
- auto ProcessCallInst = [&](Instruction &Inst) {
- CallBase &CB = static_cast<CallBase &>(Inst);
- if (CB.isInlineAsm()) {
- HasUnknownCallee = true;
- return true;
- }
+ CallBase *CB = static_cast<CallBase *>(getCtxI());
- // Process callee metadata if available.
- if (auto *MD = Inst.getMetadata(LLVMContext::MD_callees)) {
- for (auto &Op : MD->operands()) {
- Function *Callee = mdconst::extract_or_null<Function>(Op);
- if (Callee)
- AddCalledFunction(Callee);
- }
- // Callees metadata grantees that the called function is one of its
- // operands, So we are done.
- return true;
+ if (CB->isInlineAsm()) {
+ setHasUnknownCallee(false, Change);
+ return Change;
+ }
+
+ // Process callee metadata if available.
+ if (auto *MD = getCtxI()->getMetadata(LLVMContext::MD_callees)) {
+ for (auto &Op : MD->operands()) {
+ Function *Callee = mdconst::dyn_extract_or_null<Function>(Op);
+ if (Callee)
+ addCalledFunction(Callee, Change);
}
+ return Change;
+ }
- // The most simple case.
- ProcessCalledOperand(CB.getCalledOperand(), &Inst);
+ // The most simple case.
+ ProcessCalledOperand(CB->getCalledOperand());
- // Process callback functions.
- SmallVector<const Use *, 4u> CallbackUses;
- AbstractCallSite::getCallbackUses(CB, CallbackUses);
- for (const Use *U : CallbackUses)
- ProcessCalledOperand(U->get(), &Inst);
+ // Process callback functions.
+ SmallVector<const Use *, 4u> CallbackUses;
+ AbstractCallSite::getCallbackUses(*CB, CallbackUses);
+ for (const Use *U : CallbackUses)
+ ProcessCalledOperand(U->get());
+
+ return Change;
+ }
+};
+
+struct AACallEdgesFunction : public AACallEdgesImpl {
+ AACallEdgesFunction(const IRPosition &IRP, Attributor &A)
+ : AACallEdgesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto ProcessCallInst = [&](Instruction &Inst) {
+ CallBase &CB = static_cast<CallBase &>(Inst);
+
+ auto &CBEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
+ if (CBEdges.hasNonAsmUnknownCallee())
+ setHasUnknownCallee(true, Change);
+ if (CBEdges.hasUnknownCallee())
+ setHasUnknownCallee(false, Change);
+
+ for (Function *F : CBEdges.getOptimisticEdges())
+ addCalledFunction(F, Change);
return true;
};
@@ -9417,155 +9458,323 @@ struct AACallEdgesFunction : public AACallEdges {
UsedAssumedInformation)) {
// If we haven't looked at all call like instructions, assume that there
// are unknown callees.
- HasUnknownCallee = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
- // Track changes.
- if (OldHasUnknownCallee != HasUnknownCallee ||
- OldHasUnknownCalleeNonAsm != HasUnknownCalleeNonAsm)
- Change = ChangeStatus::CHANGED;
-
return Change;
}
+};
- virtual const SetVector<Function *> &getOptimisticEdges() const override {
- return CalledFunctions;
- };
+struct AAFunctionReachabilityFunction : public AAFunctionReachability {
+private:
+ struct QuerySet {
+ void markReachable(Function *Fn) {
+ Reachable.insert(Fn);
+ Unreachable.erase(Fn);
+ }
+
+ ChangeStatus update(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList) {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ for (auto *AAEdges : AAEdgesList) {
+ if (AAEdges->hasUnknownCallee()) {
+ if (!CanReachUnknownCallee)
+ Change = ChangeStatus::CHANGED;
+ CanReachUnknownCallee = true;
+ return Change;
+ }
+ }
- virtual bool hasUnknownCallee() const override { return HasUnknownCallee; }
+ for (Function *Fn : make_early_inc_range(Unreachable)) {
+ if (checkIfReachable(A, AA, AAEdgesList, Fn)) {
+ Change = ChangeStatus::CHANGED;
+ markReachable(Fn);
+ }
+ }
+ return Change;
+ }
- virtual bool hasNonAsmUnknownCallee() const override {
- return HasUnknownCalleeNonAsm;
- }
+ bool isReachable(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList, Function *Fn) {
+ // Assume that we can reach the function.
+ // TODO: Be more specific with the unknown callee.
+ if (CanReachUnknownCallee)
+ return true;
- const std::string getAsStr() const override {
- return "CallEdges[" + std::to_string(HasUnknownCallee) + "," +
- std::to_string(CalledFunctions.size()) + "]";
- }
+ if (Reachable.count(Fn))
+ return true;
- void trackStatistics() const override {}
+ if (Unreachable.count(Fn))
+ return false;
- /// Optimistic set of functions that might be called by this function.
- SetVector<Function *> CalledFunctions;
+ // We need to assume that this function can't reach Fn to prevent
+ // an infinite loop if this function is recursive.
+ Unreachable.insert(Fn);
- /// Is there any call with a unknown callee.
- bool HasUnknownCallee = false;
+ bool Result = checkIfReachable(A, AA, AAEdgesList, Fn);
+ if (Result)
+ markReachable(Fn);
+ return Result;
+ }
- /// Is there any call with a unknown callee, excluding any inline asm.
- bool HasUnknownCalleeNonAsm = false;
-};
+ bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList,
+ Function *Fn) const {
-struct AAFunctionReachabilityFunction : public AAFunctionReachability {
- AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
- : AAFunctionReachability(IRP, A) {}
+ // Handle the most trivial case first.
+ for (auto *AAEdges : AAEdgesList) {
+ const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
- bool canReach(Attributor &A, Function *Fn) const override {
- // Assume that we can reach any function if we can reach a call with
- // unknown callee.
- if (CanReachUnknownCallee)
- return true;
+ if (Edges.count(Fn))
+ return true;
+ }
- if (ReachableQueries.count(Fn))
- return true;
+ SmallVector<const AAFunctionReachability *, 8> Deps;
+ for (auto &AAEdges : AAEdgesList) {
+ const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
+
+ for (Function *Edge : Edges) {
+ // We don't need a dependency if the result is reachable.
+ const AAFunctionReachability &EdgeReachability =
+ A.getAAFor<AAFunctionReachability>(
+ AA, IRPosition::function(*Edge), DepClassTy::NONE);
+ Deps.push_back(&EdgeReachability);
+
+ if (EdgeReachability.canReach(A, Fn))
+ return true;
+ }
+ }
+
+ // The result is false for now, set dependencies and leave.
+ for (auto Dep : Deps)
+ A.recordDependence(AA, *Dep, DepClassTy::REQUIRED);
- if (UnreachableQueries.count(Fn))
return false;
+ }
+
+ /// Set of functions that we know for sure is reachable.
+ DenseSet<Function *> Reachable;
+
+ /// Set of functions that are unreachable, but might become reachable.
+ DenseSet<Function *> Unreachable;
+
+ /// If we can reach a function with a call to a unknown function we assume
+ /// that we can reach any function.
+ bool CanReachUnknownCallee = false;
+ };
+public:
+ AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
+ : AAFunctionReachability(IRP, A) {}
+
+ bool canReach(Attributor &A, Function *Fn) const override {
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
- const SetVector<Function *> &Edges = AAEdges.getOptimisticEdges();
- bool Result = checkIfReachable(A, Edges, Fn);
+ // Attributor returns attributes as const, so this function has to be
+ // const for users of this attribute to use it without having to do
+ // a const_cast.
+ // This is a hack for us to be able to cache queries.
+ auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+ bool Result =
+ NonConstThis->WholeFunction.isReachable(A, *this, {&AAEdges}, Fn);
+
+ return Result;
+ }
+
+ /// Can \p CB reach \p Fn
+ bool canReach(Attributor &A, CallBase &CB, Function *Fn) const override {
+ const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
// Attributor returns attributes as const, so this function has to be
// const for users of this attribute to use it without having to do
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+ QuerySet &CBQuery = NonConstThis->CBQueries[&CB];
- if (Result)
- NonConstThis->ReachableQueries.insert(Fn);
- else
- NonConstThis->UnreachableQueries.insert(Fn);
+ bool Result = CBQuery.isReachable(A, *this, {&AAEdges}, Fn);
return Result;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- if (CanReachUnknownCallee)
- return ChangeStatus::UNCHANGED;
-
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
- const SetVector<Function *> &Edges = AAEdges.getOptimisticEdges();
ChangeStatus Change = ChangeStatus::UNCHANGED;
- if (AAEdges.hasUnknownCallee()) {
- bool OldCanReachUnknown = CanReachUnknownCallee;
- CanReachUnknownCallee = true;
- return OldCanReachUnknown ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
+ Change |= WholeFunction.update(A, *this, {&AAEdges});
- // Check if any of the unreachable functions become reachable.
- for (auto Current = UnreachableQueries.begin();
- Current != UnreachableQueries.end();) {
- if (!checkIfReachable(A, Edges, *Current)) {
- Current++;
- continue;
- }
- ReachableQueries.insert(*Current);
- UnreachableQueries.erase(*Current++);
- Change = ChangeStatus::CHANGED;
+ for (auto CBPair : CBQueries) {
+ const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(*CBPair.first),
+ DepClassTy::REQUIRED);
+
+ Change |= CBPair.second.update(A, *this, {&AAEdges});
}
return Change;
}
const std::string getAsStr() const override {
- size_t QueryCount = ReachableQueries.size() + UnreachableQueries.size();
+ size_t QueryCount =
+ WholeFunction.Reachable.size() + WholeFunction.Unreachable.size();
- return "FunctionReachability [" + std::to_string(ReachableQueries.size()) +
- "," + std::to_string(QueryCount) + "]";
+ return "FunctionReachability [" +
+ std::to_string(WholeFunction.Reachable.size()) + "," +
+ std::to_string(QueryCount) + "]";
}
void trackStatistics() const override {}
private:
- bool canReachUnknownCallee() const override { return CanReachUnknownCallee; }
+ bool canReachUnknownCallee() const override {
+ return WholeFunction.CanReachUnknownCallee;
+ }
- bool checkIfReachable(Attributor &A, const SetVector<Function *> &Edges,
- Function *Fn) const {
- if (Edges.count(Fn))
- return true;
+ /// Used to answer if a the whole function can reacha a specific function.
+ QuerySet WholeFunction;
- for (Function *Edge : Edges) {
- // We don't need a dependency if the result is reachable.
- const AAFunctionReachability &EdgeReachability =
- A.getAAFor<AAFunctionReachability>(*this, IRPosition::function(*Edge),
- DepClassTy::NONE);
+ /// Used to answer if a call base inside this function can reach a specific
+ /// function.
+ DenseMap<CallBase *, QuerySet> CBQueries;
+};
- if (EdgeReachability.canReach(A, Fn))
- return true;
- }
- for (Function *Fn : Edges)
- A.getAAFor<AAFunctionReachability>(*this, IRPosition::function(*Fn),
- DepClassTy::REQUIRED);
+/// ---------------------- Assumption Propagation ------------------------------
+struct AAAssumptionInfoImpl : public AAAssumptionInfo {
+ AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A,
+ const DenseSet<StringRef> &Known)
+ : AAAssumptionInfo(IRP, A, Known) {}
- return false;
+ bool hasAssumption(const StringRef Assumption) const override {
+ return isValidState() && setContains(Assumption);
}
- /// Set of functions that we know for sure is reachable.
- SmallPtrSet<Function *, 8> ReachableQueries;
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ const SetContents &Known = getKnown();
+ const SetContents &Assumed = getAssumed();
+
+ const std::string KnownStr =
+ llvm::join(Known.getSet().begin(), Known.getSet().end(), ",");
+ const std::string AssumedStr =
+ (Assumed.isUniversal())
+ ? "Universal"
+ : llvm::join(Assumed.getSet().begin(), Assumed.getSet().end(), ",");
+
+ return "Known [" + KnownStr + "]," + " Assumed [" + AssumedStr + "]";
+ }
+};
+
+/// Propagates assumption information from parent functions to all of their
+/// successors. An assumption can be propagated if the containing function
+/// dominates the called function.
+///
+/// We start with a "known" set of assumptions already valid for the associated
+/// function and an "assumed" set that initially contains all possible
+/// assumptions. The assumed set is inter-procedurally updated by narrowing its
+/// contents as concrete values are known. The concrete values are seeded by the
+/// first nodes that are either entries into the call graph, or contains no
+/// assumptions. Each node is updated as the intersection of the assumed state
+/// with all of its predecessors.
+struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl {
+ AAAssumptionInfoFunction(const IRPosition &IRP, Attributor &A)
+ : AAAssumptionInfoImpl(IRP, A,
+ getAssumptions(*IRP.getAssociatedFunction())) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ const auto &Assumptions = getKnown();
+
+ // Don't manifest a universal set if it somehow made it here.
+ if (Assumptions.isUniversal())
+ return ChangeStatus::UNCHANGED;
+
+ Function *AssociatedFunction = getAssociatedFunction();
+
+ bool Changed = addAssumptions(*AssociatedFunction, Assumptions.getSet());
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool Changed = false;
+
+ auto CallSitePred = [&](AbstractCallSite ACS) {
+ const auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
+ *this, IRPosition::callsite_function(*ACS.getInstruction()),
+ DepClassTy::REQUIRED);
+ // Get the set of assumptions shared by all of this function's callers.
+ Changed |= getIntersection(AssumptionAA.getAssumed());
+ return !getAssumed().empty() || !getKnown().empty();
+ };
+
+ bool AllCallSitesKnown;
+ // Get the intersection of all assumptions held by this node's predecessors.
+ // If we don't know all the call sites then this is either an entry into the
+ // call graph or an empty node. This node is known to only contain its own
+ // assumptions and can be propagated to its successors.
+ if (!A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
- /// Set of functions that are unreachable, but might become reachable.
- SmallPtrSet<Function *, 8> UnreachableQueries;
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ void trackStatistics() const override {}
+};
+
+/// Assumption Info defined for call sites.
+struct AAAssumptionInfoCallSite final : AAAssumptionInfoImpl {
+
+ AAAssumptionInfoCallSite(const IRPosition &IRP, Attributor &A)
+ : AAAssumptionInfoImpl(IRP, A, getInitialAssumptions(IRP)) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
+ A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // Don't manifest a universal set if it somehow made it here.
+ if (getKnown().isUniversal())
+ return ChangeStatus::UNCHANGED;
- /// If we can reach a function with a call to a unknown function we assume
- /// that we can reach any function.
- bool CanReachUnknownCallee = false;
+ CallBase &AssociatedCall = cast<CallBase>(getAssociatedValue());
+ bool Changed = addAssumptions(AssociatedCall, getAssumed().getSet());
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
+ auto &AssumptionAA =
+ A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
+ bool Changed = getIntersection(AssumptionAA.getAssumed());
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+private:
+ /// Helper to initialized the known set as all the assumptions this call and
+ /// the callee contain.
+ DenseSet<StringRef> getInitialAssumptions(const IRPosition &IRP) {
+ const CallBase &CB = cast<CallBase>(IRP.getAssociatedValue());
+ auto Assumptions = getAssumptions(CB);
+ if (Function *F = IRP.getAssociatedFunction())
+ set_union(Assumptions, getAssumptions(*F));
+ if (Function *F = IRP.getAssociatedFunction())
+ set_union(Assumptions, getAssumptions(*F));
+ return Assumptions;
+ }
};
} // namespace
@@ -9603,6 +9812,7 @@ const char AANoUndef::ID = 0;
const char AACallEdges::ID = 0;
const char AAFunctionReachability::ID = 0;
const char AAPointerInfo::ID = 0;
+const char AAAssumptionInfo::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
@@ -9704,6 +9914,8 @@ CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAssumptionInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
@@ -9723,7 +9935,6 @@ CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
-CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAFunctionReachability)
CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8e81f4bad4af..178d3f41963e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -153,33 +153,30 @@ static bool mergeConstants(Module &M) {
// were just merged.
while (true) {
// Find the canonical constants others will be merged with.
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
-
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// If this GV is dead, remove it.
- GV->removeDeadConstantUsers();
- if (GV->use_empty() && GV->hasLocalLinkage()) {
- GV->eraseFromParent();
+ GV.removeDeadConstantUsers();
+ if (GV.use_empty() && GV.hasLocalLinkage()) {
+ GV.eraseFromParent();
++ChangesMade;
continue;
}
- if (isUnmergeableGlobal(GV, UsedGlobals))
+ if (isUnmergeableGlobal(&GV, UsedGlobals))
continue;
// This transformation is legal for weak ODR globals in the sense it
// doesn't change semantics, but we really don't want to perform it
// anyway; it's likely to pessimize code generation, and some tools
// (like the Darwin linker in cases involving CFString) don't expect it.
- if (GV->isWeakForLinker())
+ if (GV.isWeakForLinker())
continue;
// Don't touch globals with metadata other then !dbg.
- if (hasMetadataOtherThanDebugLoc(GV))
+ if (hasMetadataOtherThanDebugLoc(&GV))
continue;
- Constant *Init = GV->getInitializer();
+ Constant *Init = GV.getInitializer();
// Check to see if the initializer is already known.
GlobalVariable *&Slot = CMap[Init];
@@ -188,9 +185,9 @@ static bool mergeConstants(Module &M) {
// replace with the current one. If the current is externally visible
// it cannot be replace, but can be the canonical constant we merge with.
bool FirstConstantFound = !Slot;
- if (FirstConstantFound || IsBetterCanonical(*GV, *Slot)) {
- Slot = GV;
- LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV->getName()
+ if (FirstConstantFound || IsBetterCanonical(GV, *Slot)) {
+ Slot = &GV;
+ LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName()
<< (FirstConstantFound ? "\n" : " (updated)\n"));
}
}
@@ -199,18 +196,15 @@ static bool mergeConstants(Module &M) {
// SameContentReplacements vector. We cannot do the replacement in this pass
// because doing so may cause initializers of other globals to be rewritten,
// invalidating the Constant* pointers in CMap.
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
-
- if (isUnmergeableGlobal(GV, UsedGlobals))
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
+ if (isUnmergeableGlobal(&GV, UsedGlobals))
continue;
// We can only replace constant with local linkage.
- if (!GV->hasLocalLinkage())
+ if (!GV.hasLocalLinkage())
continue;
- Constant *Init = GV->getInitializer();
+ Constant *Init = GV.getInitializer();
// Check to see if the initializer is already known.
auto Found = CMap.find(Init);
@@ -218,16 +212,16 @@ static bool mergeConstants(Module &M) {
continue;
GlobalVariable *Slot = Found->second;
- if (Slot == GV)
+ if (Slot == &GV)
continue;
- if (makeMergeable(GV, Slot) == CanMerge::No)
+ if (makeMergeable(&GV, Slot) == CanMerge::No)
continue;
// Make all uses of the duplicate constant use the canonical version.
- LLVM_DEBUG(dbgs() << "Will replace: @" << GV->getName() << " -> @"
+ LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @"
<< Slot->getName() << "\n");
- SameContentReplacements.push_back(std::make_pair(GV, Slot));
+ SameContentReplacements.push_back(std::make_pair(&GV, Slot));
}
// Now that we have figured out which replacements must be made, do them all
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d95fd55870f8..fb9ab7954e36 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -175,8 +175,8 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// to pass in a smaller number of arguments into the new function.
//
std::vector<Value *> Args;
- for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) {
- CallBase *CB = dyn_cast<CallBase>(*I++);
+ for (User *U : llvm::make_early_inc_range(Fn.users())) {
+ CallBase *CB = dyn_cast<CallBase>(U);
if (!CB)
continue;
@@ -188,9 +188,9 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
if (!PAL.isEmpty()) {
SmallVector<AttributeSet, 8> ArgAttrs;
for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
- ArgAttrs.push_back(PAL.getParamAttributes(ArgNo));
- PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(),
- PAL.getRetAttributes(), ArgAttrs);
+ ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
+ PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttrs(),
+ PAL.getRetAttrs(), ArgAttrs);
}
SmallVector<OperandBundleDef, 1> OpBundles;
@@ -762,8 +762,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
ArgAlive[ArgI] = true;
- ArgAttrVec.push_back(PAL.getParamAttributes(ArgI));
- HasLiveReturnedArg |= PAL.hasParamAttribute(ArgI, Attribute::Returned);
+ ArgAttrVec.push_back(PAL.getParamAttrs(ArgI));
+ HasLiveReturnedArg |= PAL.hasParamAttr(ArgI, Attribute::Returned);
} else {
++NumArgumentsEliminated;
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument "
@@ -838,7 +838,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
assert(NRetTy && "No new return type found?");
// The existing function return attributes.
- AttrBuilder RAttrs(PAL.getRetAttributes());
+ AttrBuilder RAttrs(PAL.getRetAttrs());
// Remove any incompatible attributes, but only if we removed all return
// values. Otherwise, ensure that we don't have any conflicting attributes
@@ -853,8 +853,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Strip allocsize attributes. They might refer to the deleted arguments.
- AttributeSet FnAttrs = PAL.getFnAttributes().removeAttribute(
- F->getContext(), Attribute::AllocSize);
+ AttributeSet FnAttrs =
+ PAL.getFnAttrs().removeAttribute(F->getContext(), Attribute::AllocSize);
// Reconstruct the AttributesList based on the vector we constructed.
assert(ArgAttrVec.size() == Params.size());
@@ -889,7 +889,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Adjust the call return attributes in case the function was changed to
// return void.
- AttrBuilder RAttrs(CallPAL.getRetAttributes());
+ AttrBuilder RAttrs(CallPAL.getRetAttrs());
RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
@@ -903,7 +903,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[Pi]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- AttributeSet Attrs = CallPAL.getParamAttributes(Pi);
+ AttributeSet Attrs = CallPAL.getParamAttrs(Pi);
if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
@@ -922,7 +922,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Push any varargs arguments on the list. Don't forget their attributes.
for (auto E = CB.arg_end(); I != E; ++I, ++Pi) {
Args.push_back(*I);
- ArgAttrVec.push_back(CallPAL.getParamAttributes(Pi));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(Pi));
}
// Reconstruct the AttributesList based on the vector we constructed.
@@ -930,7 +930,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Again, be sure to remove any allocsize attributes, since their indices
// may now be incorrect.
- AttributeSet FnAttrs = CallPAL.getFnAttributes().removeAttribute(
+ AttributeSet FnAttrs = CallPAL.getFnAttrs().removeAttribute(
F->getContext(), Attribute::AllocSize);
AttributeList NewCallPAL = AttributeList::get(
@@ -1094,11 +1094,9 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
// fused with the next loop, because deleting a function invalidates
// information computed while surveying other functions.
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n");
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function &F = *I++;
+ for (Function &F : llvm::make_early_inc_range(M))
if (F.getFunctionType()->isVarArg())
Changed |= DeleteDeadVarargs(F);
- }
// Second phase:loop through the module, determining which arguments are live.
// We assume all arguments are dead unless proven otherwise (allowing us to
@@ -1109,13 +1107,10 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
SurveyFunction(F);
// Now, remove all dead arguments and return values from each function in
- // turn.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- // Increment now, because the function will probably get removed (ie.
- // replaced by a new one).
- Function *F = &*I++;
- Changed |= RemoveDeadStuffFromFunction(F);
- }
+ // turn. We use make_early_inc_range here because functions will probably get
+ // removed (i.e. replaced by new ones).
+ for (Function &F : llvm::make_early_inc_range(M))
+ Changed |= RemoveDeadStuffFromFunction(&F);
// Finally, look for any unused parameters in functions with non-local
// linkage and replace the passed in parameters with undef.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ExtractGV.cpp
index ba0efd46af16..387f114f6ffa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -121,32 +121,27 @@ namespace {
}
// Visit the Aliases.
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E;) {
- Module::alias_iterator CurI = I;
- ++I;
-
- bool Delete = deleteStuff == (bool)Named.count(&*CurI);
- makeVisible(*CurI, Delete);
+ for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
+ bool Delete = deleteStuff == (bool)Named.count(&GA);
+ makeVisible(GA, Delete);
if (Delete) {
- Type *Ty = CurI->getValueType();
+ Type *Ty = GA.getValueType();
- CurI->removeFromParent();
+ GA.removeFromParent();
llvm::Value *Declaration;
if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
- Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
- CurI->getAddressSpace(),
- CurI->getName(), &M);
+ Declaration =
+ Function::Create(FTy, GlobalValue::ExternalLinkage,
+ GA.getAddressSpace(), GA.getName(), &M);
} else {
Declaration =
- new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
- nullptr, CurI->getName());
-
+ new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, GA.getName());
}
- CurI->replaceAllUsesWith(Declaration);
- delete &*CurI;
+ GA.replaceAllUsesWith(Declaration);
+ delete &GA;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 47fdf042f9d4..16d00a0c89e1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -50,14 +50,14 @@ static void forceAttributes(Function &F) {
return Kind;
};
- for (auto &S : ForceAttributes) {
+ for (const auto &S : ForceAttributes) {
auto Kind = ParseFunctionAndAttr(S);
if (Kind == Attribute::None || F.hasFnAttribute(Kind))
continue;
F.addFnAttr(Kind);
}
- for (auto &S : ForceRemoveAttributes) {
+ for (const auto &S : ForceRemoveAttributes) {
auto Kind = ParseFunctionAndAttr(S);
if (Kind == Attribute::None || !F.hasFnAttribute(Kind))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index ca8660a98ded..cde78713b554 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -14,10 +14,12 @@
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -82,6 +84,11 @@ STATISTIC(NumNoFree, "Number of functions marked as nofree");
STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
STATISTIC(NumNoSync, "Number of functions marked as nosync");
+STATISTIC(NumThinLinkNoRecurse,
+ "Number of functions marked as norecurse during thinlink");
+STATISTIC(NumThinLinkNoUnwind,
+ "Number of functions marked as nounwind during thinlink");
+
static cl::opt<bool> EnableNonnullArgPropagation(
"enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
cl::desc("Try to propagate nonnull argument attributes from callsites to "
@@ -95,6 +102,10 @@ static cl::opt<bool> DisableNoFreeInference(
"disable-nofree-inference", cl::Hidden,
cl::desc("Stop inferring nofree attribute during function-attrs pass"));
+static cl::opt<bool> DisableThinLTOPropagation(
+ "disable-thinlto-funcattrs", cl::init(true), cl::Hidden,
+ cl::desc("Don't propagate function-attrs in thinLTO"));
+
namespace {
using SCCNodeSet = SmallSetVector<Function *, 8>;
@@ -131,12 +142,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
bool WritesMemory = false;
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
-
+ for (Instruction &I : instructions(F)) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
- if (auto *Call = dyn_cast<CallBase>(I)) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
// Ignore calls to functions in the same SCC, as long as the call sites
// don't have operand bundles. Calls with operand bundles are allowed to
// have memory effects not described by the memory effects of the call
@@ -170,14 +179,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Check whether all pointer arguments point to local memory, and
// ignore calls that only access local memory.
- for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) {
- Value *Arg = *CI;
+ for (const Use &U : Call->args()) {
+ const Value *Arg = U;
if (!Arg->getType()->isPtrOrPtrVectorTy())
continue;
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
- MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo);
+ MemoryLocation Loc =
+ MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata());
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
@@ -192,21 +200,21 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
ReadsMemory = true;
}
continue;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
// Ignore non-volatile loads from local memory. (Atomic is okay here.)
if (!LI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(LI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
// Ignore non-volatile stores to local memory. (Atomic is okay here.)
if (!SI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(SI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
- } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(&I)) {
// Ignore vaargs on local memory.
MemoryLocation Loc = MemoryLocation::get(VI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
@@ -217,10 +225,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// read or write memory.
//
// Writes memory, remember that.
- WritesMemory |= I->mayWriteToMemory();
+ WritesMemory |= I.mayWriteToMemory();
// If this instruction may read memory, remember that.
- ReadsMemory |= I->mayReadFromMemory();
+ ReadsMemory |= I.mayReadFromMemory();
}
if (WritesMemory) {
@@ -240,7 +248,8 @@ MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
/// Deduce readonly/readnone attributes for the SCC.
template <typename AARGetterT>
-static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
+static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+ SmallSet<Function *, 8> &Changed) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
@@ -255,7 +264,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
AAR, SCCNodes)) {
case MAK_MayWrite:
- return false;
+ return;
case MAK_ReadOnly:
ReadsMemory = true;
break;
@@ -271,11 +280,10 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// If the SCC contains both functions that read and functions that write, then
// we cannot add readonly attributes.
if (ReadsMemory && WritesMemory)
- return false;
+ return;
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
- bool MadeChange = false;
for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
@@ -289,7 +297,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
if (F->doesNotReadMemory() && WritesMemory)
continue;
- MadeChange = true;
+ Changed.insert(F);
// Clear out any existing attributes.
AttrBuilder AttrsToRemove;
@@ -303,7 +311,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly);
AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
}
- F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove);
+ F->removeFnAttrs(AttrsToRemove);
// Add in the new attribute.
if (WritesMemory && !ReadsMemory)
@@ -318,8 +326,195 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
else
++NumReadNone;
}
+}
+
+// Compute definitive function attributes for a function taking into account
+// prevailing definitions and linkage types
+static FunctionSummary *calculatePrevailingSummary(
+ ValueInfo VI,
+ DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing) {
+
+ if (CachedPrevailingSummary.count(VI))
+ return CachedPrevailingSummary[VI];
+
+ /// At this point, prevailing symbols have been resolved. The following leads
+ /// to returning a conservative result:
+ /// - Multiple instances with local linkage. Normally local linkage would be
+ /// unique per module
+ /// as the GUID includes the module path. We could have a guid alias if
+ /// there wasn't any distinguishing path when each file was compiled, but
+ /// that should be rare so we'll punt on those.
+
+ /// These next 2 cases should not happen and will assert:
+ /// - Multiple instances with external linkage. This should be caught in
+ /// symbol resolution
+ /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
+ /// knowledge meaning we have to go conservative.
+
+ /// Otherwise, we calculate attributes for a function as:
+ /// 1. If we have a local linkage, take its attributes. If there's somehow
+ /// multiple, bail and go conservative.
+ /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
+ /// prevailing, take its attributes.
+ /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
+ /// differences. However, if the prevailing copy is known it will be used
+ /// so take its attributes. If the prevailing copy is in a native file
+ /// all IR copies will be dead and propagation will go conservative.
+ /// 4. AvailableExternally summaries without a prevailing copy are known to
+ /// occur in a couple of circumstances:
+ /// a. An internal function gets imported due to its caller getting
+ /// imported, it becomes AvailableExternally but no prevailing
+ /// definition exists. Because it has to get imported along with its
+ /// caller the attributes will be captured by propagating on its
+ /// caller.
+ /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
+ /// definitions of explicitly instanced template declarations
+ /// for inlining which are ultimately dropped from the TU. Since this
+ /// is localized to the TU the attributes will have already made it to
+ /// the callers.
+ /// These are edge cases and already captured by their callers so we
+ /// ignore these for now. If they become relevant to optimize in the
+ /// future this can be revisited.
+ /// 5. Otherwise, go conservative.
+
+ CachedPrevailingSummary[VI] = nullptr;
+ FunctionSummary *Local = nullptr;
+ FunctionSummary *Prevailing = nullptr;
+
+ for (const auto &GVS : VI.getSummaryList()) {
+ if (!GVS->isLive())
+ continue;
+
+ FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject());
+ // Virtual and Unknown (e.g. indirect) calls require going conservative
+ if (!FS || FS->fflags().HasUnknownCall)
+ return nullptr;
+
+ const auto &Linkage = GVS->linkage();
+ if (GlobalValue::isLocalLinkage(Linkage)) {
+ if (Local) {
+ LLVM_DEBUG(
+ dbgs()
+ << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
+ "function "
+ << VI.name() << " from " << FS->modulePath() << ". Previous module "
+ << Local->modulePath() << "\n");
+ return nullptr;
+ }
+ Local = FS;
+ } else if (GlobalValue::isExternalLinkage(Linkage)) {
+ assert(IsPrevailing(VI.getGUID(), GVS.get()));
+ Prevailing = FS;
+ break;
+ } else if (GlobalValue::isWeakODRLinkage(Linkage) ||
+ GlobalValue::isLinkOnceODRLinkage(Linkage) ||
+ GlobalValue::isWeakAnyLinkage(Linkage) ||
+ GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
+ if (IsPrevailing(VI.getGUID(), GVS.get())) {
+ Prevailing = FS;
+ break;
+ }
+ } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
+ // TODO: Handle these cases if they become meaningful
+ continue;
+ }
+ }
+
+ if (Local) {
+ assert(!Prevailing);
+ CachedPrevailingSummary[VI] = Local;
+ } else if (Prevailing) {
+ assert(!Local);
+ CachedPrevailingSummary[VI] = Prevailing;
+ }
- return MadeChange;
+ return CachedPrevailingSummary[VI];
+}
+
+bool llvm::thinLTOPropagateFunctionAttrs(
+ ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing) {
+ // TODO: implement addNoAliasAttrs once
+ // there's more information about the return type in the summary
+ if (DisableThinLTOPropagation)
+ return false;
+
+ DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
+ bool Changed = false;
+
+ auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
+ // Assume we can propagate unless we discover otherwise
+ FunctionSummary::FFlags InferredFlags;
+ InferredFlags.NoRecurse = (SCCNodes.size() == 1);
+ InferredFlags.NoUnwind = true;
+
+ for (auto &V : SCCNodes) {
+ FunctionSummary *CallerSummary =
+ calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing);
+
+ // Function summaries can fail to contain information such as declarations
+ if (!CallerSummary)
+ return;
+
+ if (CallerSummary->fflags().MayThrow)
+ InferredFlags.NoUnwind = false;
+
+ for (const auto &Callee : CallerSummary->calls()) {
+ FunctionSummary *CalleeSummary = calculatePrevailingSummary(
+ Callee.first, CachedPrevailingSummary, IsPrevailing);
+
+ if (!CalleeSummary)
+ return;
+
+ if (!CalleeSummary->fflags().NoRecurse)
+ InferredFlags.NoRecurse = false;
+
+ if (!CalleeSummary->fflags().NoUnwind)
+ InferredFlags.NoUnwind = false;
+
+ if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
+ break;
+ }
+ }
+
+ if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
+ Changed = true;
+ for (auto &V : SCCNodes) {
+ if (InferredFlags.NoRecurse) {
+ LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
+ << V.name() << "\n");
+ ++NumThinLinkNoRecurse;
+ }
+
+ if (InferredFlags.NoUnwind) {
+ LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
+ << V.name() << "\n");
+ ++NumThinLinkNoUnwind;
+ }
+
+ for (auto &S : V.getSummaryList()) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get())) {
+ if (InferredFlags.NoRecurse)
+ FS->setNoRecurse();
+
+ if (InferredFlags.NoUnwind)
+ FS->setNoUnwind();
+ }
+ }
+ }
+ }
+ };
+
+ // Call propagation functions on each SCC in the Index
+ for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd();
+ ++I) {
+ std::vector<ValueInfo> Nodes(*I);
+ PropagateAttributes(Nodes);
+ }
+ return Changed;
}
namespace {
@@ -395,7 +590,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
assert(UseIndex < CB->data_operands_size() &&
"Indirect function calls should have been filtered above!");
- if (UseIndex >= CB->getNumArgOperands()) {
+ if (UseIndex >= CB->arg_size()) {
// Data operand, but not a argument operand -- must be a bundle operand
assert(CB->hasOperandBundles() && "Must be!");
@@ -530,7 +725,7 @@ determinePointerReadAttrs(Argument *A,
assert(UseIndex < CB.data_operands_size() &&
"Data operand use expected!");
- bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands();
+ bool IsOperandBundleUse = UseIndex >= CB.arg_size();
if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
assert(F->isVarArg() && "More params than args in non-varargs call");
@@ -581,9 +776,8 @@ determinePointerReadAttrs(Argument *A,
}
/// Deduce returned attributes for the SCC.
-static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Check each function in turn, determining if an argument is always returned.
for (Function *F : SCCNodes) {
// We can infer and propagate function attributes only when we know that the
@@ -623,11 +817,9 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
auto *A = cast<Argument>(RetArg);
A->addAttr(Attribute::Returned);
++NumReturned;
- Changed = true;
+ Changed.insert(F);
}
}
-
- return Changed;
}
/// If a callsite has arguments that are also arguments to the parent function,
@@ -693,9 +885,8 @@ static bool addReadAttr(Argument *A, Attribute::AttrKind R) {
}
/// Deduce nocapture attributes for the SCC.
-static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
ArgumentGraph AG;
// Check each function in turn, determining which pointer arguments are not
@@ -707,7 +898,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!F->hasExactDefinition())
continue;
- Changed |= addArgumentAttrsFromCallsites(*F);
+ if (addArgumentAttrsFromCallsites(*F))
+ Changed.insert(F);
// Functions that are readonly (or readnone) and nounwind and don't return
// a value can't capture arguments. Don't analyze them.
@@ -718,7 +910,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(F);
}
}
continue;
@@ -737,7 +929,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
// If it's trivially not captured, mark it nocapture now.
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(F);
} else {
// If it's not trivially captured and not trivially not captured,
// then it must be calling into another function in our SCC. Save
@@ -761,7 +953,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Self.insert(&*A);
Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None)
- Changed = addReadAttr(A, R);
+ if (addReadAttr(A, R))
+ Changed.insert(F);
}
}
}
@@ -785,7 +978,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Argument *A = ArgumentSCC[0]->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(A->getParent());
}
continue;
}
@@ -827,7 +1020,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Argument *A = ArgumentSCC[i]->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(A->getParent());
}
// We also want to compute readonly/readnone. With a small number of false
@@ -858,12 +1051,11 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ReadAttr != Attribute::None) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- Changed = addReadAttr(A, ReadAttr);
+ if (addReadAttr(A, ReadAttr))
+ Changed.insert(A->getParent());
}
}
}
-
- return Changed;
}
/// Tests whether a function is "malloc-like".
@@ -934,7 +1126,8 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
}
/// Deduce noalias attributes for the SCC.
-static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
+static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Check each function in turn, determining which functions return noalias
// pointers.
for (Function *F : SCCNodes) {
@@ -946,7 +1139,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
- return false;
+ return;
// We annotate noalias return values, which are only applicable to
// pointer types.
@@ -954,10 +1147,9 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
continue;
if (!isFunctionMallocLike(F, SCCNodes))
- return false;
+ return;
}
- bool MadeChange = false;
for (Function *F : SCCNodes) {
if (F->returnDoesNotAlias() ||
!F->getReturnType()->isPointerTy())
@@ -965,10 +1157,8 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
F->setReturnDoesNotAlias();
++NumNoAlias;
- MadeChange = true;
+ Changed.insert(F);
}
-
- return MadeChange;
}
/// Tests whether this function is known to not return null.
@@ -1044,26 +1234,24 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
}
/// Deduce nonnull attributes for the SCC.
-static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
+static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Speculative that all functions in the SCC return only nonnull
// pointers. We may refute this as we analyze functions.
bool SCCReturnsNonNull = true;
- bool MadeChange = false;
-
// Check each function in turn, determining which functions return nonnull
// pointers.
for (Function *F : SCCNodes) {
// Already nonnull.
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::NonNull))
+ if (F->getAttributes().hasRetAttr(Attribute::NonNull))
continue;
// We can infer and propagate function attributes only when we know that the
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
- return false;
+ return;
// We annotate nonnull return values, which are only applicable to
// pointer types.
@@ -1077,9 +1265,9 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// which prevents us from speculating about the entire SCC
LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
<< " as nonnull\n");
- F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F->addRetAttr(Attribute::NonNull);
++NumNonNullReturn;
- MadeChange = true;
+ Changed.insert(F);
}
continue;
}
@@ -1090,19 +1278,16 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
if (SCCReturnsNonNull) {
for (Function *F : SCCNodes) {
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::NonNull) ||
+ if (F->getAttributes().hasRetAttr(Attribute::NonNull) ||
!F->getReturnType()->isPointerTy())
continue;
LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F->addRetAttr(Attribute::NonNull);
++NumNonNullReturn;
- MadeChange = true;
+ Changed.insert(F);
}
}
-
- return MadeChange;
}
namespace {
@@ -1155,12 +1340,13 @@ public:
InferenceDescriptors.push_back(AttrInference);
}
- bool run(const SCCNodeSet &SCCNodes);
+ void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed);
};
/// Perform all the requested attribute inference actions according to the
/// attribute predicates stored before.
-bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
+void AttributeInferer::run(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
// Go through all the functions in SCC and check corresponding attribute
// assumptions for each of them. Attributes that are invalid for this SCC
@@ -1169,7 +1355,7 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
// No attributes whose assumptions are still valid - done.
if (InferInSCC.empty())
- return false;
+ return;
// Check if our attributes ever need scanning/can be scanned.
llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) {
@@ -1212,9 +1398,8 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
}
if (InferInSCC.empty())
- return false;
+ return;
- bool Changed = false;
for (Function *F : SCCNodes)
// At this point InferInSCC contains only functions that were either:
// - explicitly skipped from scan/inference, or
@@ -1223,10 +1408,9 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
for (auto &ID : InferInSCC) {
if (ID.SkipFunction(*F))
continue;
- Changed = true;
+ Changed.insert(F);
ID.SetAttribute(*F);
}
- return Changed;
}
struct SCCNodesResult {
@@ -1243,7 +1427,7 @@ static bool InstrBreaksNonConvergent(Instruction &I,
// Breaks non-convergent assumption if CS is a convergent call to a function
// not in the SCC.
return CB && CB->isConvergent() &&
- SCCNodes.count(CB->getCalledFunction()) == 0;
+ !SCCNodes.contains(CB->getCalledFunction());
}
/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
@@ -1282,7 +1466,8 @@ static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
/// Attempt to remove convergent function attribute when possible.
///
/// Returns true if any changes to function attributes were made.
-static bool inferConvergent(const SCCNodeSet &SCCNodes) {
+static void inferConvergent(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
// Request to remove the convergent attribute from all functions in the SCC
@@ -1305,7 +1490,7 @@ static bool inferConvergent(const SCCNodeSet &SCCNodes) {
},
/* RequiresExactDefinition= */ false});
// Perform all the requested attribute inference actions.
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
/// Infer attributes from all functions in the SCC by scanning every
@@ -1314,7 +1499,8 @@ static bool inferConvergent(const SCCNodeSet &SCCNodes) {
/// - addition of NoUnwind attribute
///
/// Returns true if any changes to function attributes were made.
-static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
+static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
if (!DisableNoUnwindInference)
@@ -1363,19 +1549,20 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
/* RequiresExactDefinition= */ true});
// Perform all the requested attribute inference actions.
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
-static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
+static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
if (SCCNodes.size() != 1)
- return false;
+ return;
Function *F = *SCCNodes.begin();
if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
- return false;
+ return;
// If all of the calls in F are identifiable and are to norecurse functions, F
// is norecurse. This check also detects self-recursion as F is not currently
@@ -1386,7 +1573,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
Function *Callee = CB->getCalledFunction();
if (!Callee || Callee == F || !Callee->doesNotRecurse())
// Function calls a potentially recursive function.
- return false;
+ return;
}
// Every call was to a non-recursive function other than this function, and
@@ -1394,7 +1581,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// recurse.
F->setDoesNotRecurse();
++NumNoRecurse;
- return true;
+ Changed.insert(F);
}
static bool instructionDoesNotReturn(Instruction &I) {
@@ -1412,9 +1599,8 @@ static bool basicBlockCanReturn(BasicBlock &BB) {
}
// Set the noreturn function attribute if possible.
-static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
for (Function *F : SCCNodes) {
if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) ||
F->doesNotReturn())
@@ -1424,11 +1610,9 @@ static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
// FIXME: this doesn't handle recursion or unreachable blocks.
if (none_of(*F, basicBlockCanReturn)) {
F->setDoesNotReturn();
- Changed = true;
+ Changed.insert(F);
}
}
-
- return Changed;
}
static bool functionWillReturn(const Function &F) {
@@ -1461,19 +1645,16 @@ static bool functionWillReturn(const Function &F) {
}
// Set the willreturn function attribute if possible.
-static bool addWillReturn(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addWillReturn(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
for (Function *F : SCCNodes) {
if (!F || F->willReturn() || !functionWillReturn(*F))
continue;
F->setWillReturn();
NumWillReturn++;
- Changed = true;
+ Changed.insert(F);
}
-
- return Changed;
}
// Return true if this is an atomic which has an ordering stronger than
@@ -1532,7 +1713,8 @@ static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
}
// Infer the nosync attribute.
-static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
+static void addNoSyncAttr(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
Attribute::NoSync,
@@ -1549,14 +1731,15 @@ static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
++NumNoSync;
},
/* RequiresExactDefinition= */ true});
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
SCCNodesResult Res;
Res.HasUnknownCall = false;
for (Function *F : Functions) {
- if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
+ if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) ||
+ F->isPresplitCoroutine()) {
// Treat any function we're trying not to optimize as if it were an
// indirect call and omit it from the node set used below.
Res.HasUnknownCall = true;
@@ -1582,32 +1765,33 @@ static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
}
template <typename AARGetterT>
-static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
- AARGetterT &&AARGetter) {
+static SmallSet<Function *, 8>
+deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
SCCNodesResult Nodes = createSCCNodeSet(Functions);
- bool Changed = false;
// Bail if the SCC only contains optnone functions.
if (Nodes.SCCNodes.empty())
- return Changed;
+ return {};
+
+ SmallSet<Function *, 8> Changed;
- Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes);
- Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter);
- Changed |= addArgumentAttrs(Nodes.SCCNodes);
- Changed |= inferConvergent(Nodes.SCCNodes);
- Changed |= addNoReturnAttrs(Nodes.SCCNodes);
- Changed |= addWillReturn(Nodes.SCCNodes);
+ addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
+ addReadAttrs(Nodes.SCCNodes, AARGetter, Changed);
+ addArgumentAttrs(Nodes.SCCNodes, Changed);
+ inferConvergent(Nodes.SCCNodes, Changed);
+ addNoReturnAttrs(Nodes.SCCNodes, Changed);
+ addWillReturn(Nodes.SCCNodes, Changed);
// If we have no external nodes participating in the SCC, we can deduce some
// more precise attributes as well.
if (!Nodes.HasUnknownCall) {
- Changed |= addNoAliasAttrs(Nodes.SCCNodes);
- Changed |= addNonNullAttrs(Nodes.SCCNodes);
- Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes);
- Changed |= addNoRecurseAttrs(Nodes.SCCNodes);
+ addNoAliasAttrs(Nodes.SCCNodes, Changed);
+ addNonNullAttrs(Nodes.SCCNodes, Changed);
+ inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed);
+ addNoRecurseAttrs(Nodes.SCCNodes, Changed);
}
- Changed |= addNoSyncAttr(Nodes.SCCNodes);
+ addNoSyncAttr(Nodes.SCCNodes, Changed);
// Finally, infer the maximal set of attributes from the ones we've inferred
// above. This is handling the cases where one attribute on a signature
@@ -1615,7 +1799,8 @@ static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
// the later is missing (or simply less sophisticated).
for (Function *F : Nodes.SCCNodes)
if (F)
- Changed |= inferAttributesFromOthers(*F);
+ if (inferAttributesFromOthers(*F))
+ Changed.insert(F);
return Changed;
}
@@ -1638,14 +1823,35 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
Functions.push_back(&N.getFunction());
}
- if (deriveAttrsInPostOrder(Functions, AARGetter)) {
- // We have not changed the call graph or removed/added functions.
- PreservedAnalyses PA;
- PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
- return PA;
+ auto ChangedFunctions = deriveAttrsInPostOrder(Functions, AARGetter);
+ if (ChangedFunctions.empty())
+ return PreservedAnalyses::all();
+
+ // Invalidate analyses for modified functions so that we don't have to
+ // invalidate all analyses for all functions in this SCC.
+ PreservedAnalyses FuncPA;
+ // We haven't changed the CFG for modified functions.
+ FuncPA.preserveSet<CFGAnalyses>();
+ for (Function *Changed : ChangedFunctions) {
+ FAM.invalidate(*Changed, FuncPA);
+ // Also invalidate any direct callers of changed functions since analyses
+ // may care about attributes of direct callees. For example, MemorySSA cares
+ // about whether or not a call's callee modifies memory and queries that
+ // through function attributes.
+ for (auto *U : Changed->users()) {
+ if (auto *Call = dyn_cast<CallBase>(U)) {
+ if (Call->getCalledFunction() == Changed)
+ FAM.invalidate(*Call->getFunction(), FuncPA);
+ }
+ }
}
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ // We have not added or removed functions.
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We already invalidated all relevant function analyses above.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
}
namespace {
@@ -1690,7 +1896,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
Functions.push_back(I->getFunction());
}
- return deriveAttrsInPostOrder(Functions, AARGetter);
+ return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
}
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 2f6cf0ca7087..d9b43109f629 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -187,23 +188,6 @@ selectCallee(const ModuleSummaryIndex &Index,
return false;
}
- // For SamplePGO, in computeImportForFunction the OriginalId
- // may have been used to locate the callee summary list (See
- // comment there).
- // The mapping from OriginalId to GUID may return a GUID
- // that corresponds to a static variable. Filter it out here.
- // This can happen when
- // 1) There is a call to a library function which is not defined
- // in the index.
- // 2) There is a static variable with the OriginalGUID identical
- // to the GUID of the library function in 1);
- // When this happens, the logic for SamplePGO kicks in and
- // the static variable in 2) will be found, which needs to be
- // filtered out.
- if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind) {
- Reason = FunctionImporter::ImportFailureReason::GlobalVar;
- return false;
- }
if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) {
Reason = FunctionImporter::ImportFailureReason::InterposableLinkage;
// There is no point in importing these, we can't inline them
@@ -264,21 +248,6 @@ using EdgeInfo =
} // anonymous namespace
-static ValueInfo
-updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
- if (!VI.getSummaryList().empty())
- return VI;
- // For SamplePGO, the indirect call targets for local functions will
- // have its original name annotated in profile. We try to find the
- // corresponding PGOFuncName as the GUID.
- // FIXME: Consider updating the edges in the graph after building
- // it, rather than needing to perform this mapping on each walk.
- auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
- if (GUID == 0)
- return ValueInfo();
- return Index.getValueInfo(GUID);
-}
-
static bool shouldImportGlobal(const ValueInfo &VI,
const GVSummaryMapTy &DefinedGVSummaries) {
const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
@@ -400,10 +369,6 @@ static void computeImportForFunction(
continue;
}
- VI = updateValueInfoForIndirectCalls(Index, VI);
- if (!VI)
- continue;
-
if (DefinedGVSummaries.count(VI.getGUID())) {
// FIXME: Consider not skipping import if the module contains
// a non-prevailing def with interposable linkage. The prevailing copy
@@ -496,7 +461,7 @@ static void computeImportForFunction(
VI.name().str() + " due to " +
getFailureName(Reason);
auto Error = make_error<StringError>(
- Msg, std::make_error_code(std::errc::operation_not_supported));
+ Msg, make_error_code(errc::not_supported));
logAllUnhandledErrors(std::move(Error), errs(),
"Error importing module: ");
break;
@@ -839,16 +804,61 @@ void llvm::ComputeCrossModuleImportForModuleFromIndex(
#endif
}
-void llvm::computeDeadSymbols(
+// For SamplePGO, the indirect call targets for local functions will
+// have its original name annotated in profile. We try to find the
+// corresponding PGOFuncName as the GUID, and fix up the edges
+// accordingly.
+void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
+ FunctionSummary *FS) {
+ for (auto &EI : FS->mutableCalls()) {
+ if (!EI.first.getSummaryList().empty())
+ continue;
+ auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
+ if (GUID == 0)
+ continue;
+ // Update the edge to point directly to the correct GUID.
+ auto VI = Index.getValueInfo(GUID);
+ if (llvm::any_of(
+ VI.getSummaryList(),
+ [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
+ // The mapping from OriginalId to GUID may return a GUID
+ // that corresponds to a static variable. Filter it out here.
+ // This can happen when
+ // 1) There is a call to a library function which is not defined
+ // in the index.
+ // 2) There is a static variable with the OriginalGUID identical
+ // to the GUID of the library function in 1);
+ // When this happens the static variable in 2) will be found,
+ // which needs to be filtered out.
+ return SummaryPtr->getSummaryKind() ==
+ GlobalValueSummary::GlobalVarKind;
+ }))
+ continue;
+ EI.first = VI;
+ }
+}
+
+void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
+ for (const auto &Entry : Index) {
+ for (auto &S : Entry.second.SummaryList) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
+ updateValueInfoForIndirectCalls(Index, FS);
+ }
+ }
+}
+
+void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
assert(!Index.withGlobalValueDeadStripping());
- if (!ComputeDead)
- return;
- if (GUIDPreservedSymbols.empty())
- // Don't do anything when nothing is live, this is friendly with tests.
+ if (!ComputeDead ||
+ // Don't do anything when nothing is live, this is friendly with tests.
+ GUIDPreservedSymbols.empty()) {
+ // Still need to update indirect calls.
+ updateIndirectCalls(Index);
return;
+ }
unsigned LiveSymbols = 0;
SmallVector<ValueInfo, 128> Worklist;
Worklist.reserve(GUIDPreservedSymbols.size() * 2);
@@ -863,13 +873,16 @@ void llvm::computeDeadSymbols(
// Add values flagged in the index as live roots to the worklist.
for (const auto &Entry : Index) {
auto VI = Index.getValueInfo(Entry);
- for (auto &S : Entry.second.SummaryList)
+ for (auto &S : Entry.second.SummaryList) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
+ updateValueInfoForIndirectCalls(Index, FS);
if (S->isLive()) {
LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
Worklist.push_back(VI);
++LiveSymbols;
break;
}
+ }
}
// Make value live and add it to the worklist if it was not live before.
@@ -882,9 +895,6 @@ void llvm::computeDeadSymbols(
// binary, which increases the binary size unnecessarily. Note that
// if this code changes, the importer needs to change so that edges
// to functions marked dead are skipped.
- VI = updateValueInfoForIndirectCalls(Index, VI);
- if (!VI)
- return;
if (llvm::any_of(VI.getSummaryList(),
[](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
@@ -958,7 +968,8 @@ void llvm::computeDeadSymbolsWithConstProp(
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
bool ImportEnabled) {
- computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
+ computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
+ isPrevailing);
if (ImportEnabled)
Index.propagateAttributes(GUIDPreservedSymbols);
}
@@ -1040,13 +1051,33 @@ bool llvm::convertToDeclaration(GlobalValue &GV) {
return true;
}
-void llvm::thinLTOResolvePrevailingInModule(
- Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
- auto updateLinkage = [&](GlobalValue &GV) {
+void llvm::thinLTOFinalizeInModule(Module &TheModule,
+ const GVSummaryMapTy &DefinedGlobals,
+ bool PropagateAttrs) {
+ auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
// See if the global summary analysis computed a new resolved linkage.
const auto &GS = DefinedGlobals.find(GV.getGUID());
if (GS == DefinedGlobals.end())
return;
+
+ if (Propagate)
+ if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ // TODO: propagate ReadNone and ReadOnly.
+ if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
+ F->setDoesNotAccessMemory();
+
+ if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
+ F->setOnlyReadsMemory();
+
+ if (FS->fflags().NoRecurse && !F->doesNotRecurse())
+ F->setDoesNotRecurse();
+
+ if (FS->fflags().NoUnwind && !F->doesNotThrow())
+ F->setDoesNotThrow();
+ }
+ }
+
auto NewLinkage = GS->second->linkage();
if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
// Don't internalize anything here, because the code below
@@ -1105,11 +1136,11 @@ void llvm::thinLTOResolvePrevailingInModule(
// Process functions and global now
for (auto &GV : TheModule)
- updateLinkage(GV);
+ FinalizeInModule(GV, PropagateAttrs);
for (auto &GV : TheModule.globals())
- updateLinkage(GV);
+ FinalizeInModule(GV);
for (auto &GV : TheModule.aliases())
- updateLinkage(GV);
+ FinalizeInModule(GV);
}
/// Run internalization on \p TheModule based on symmary analysis.
@@ -1153,7 +1184,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
/// Make alias a clone of its aliasee.
static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
- Function *Fn = cast<Function>(GA->getBaseObject());
+ Function *Fn = cast<Function>(GA->getAliaseeObject());
ValueToValueMapTy VMap;
Function *NewFn = CloneFunction(Fn, VMap);
@@ -1259,12 +1290,12 @@ Expected<bool> FunctionImporter::importFunctions(
if (Error Err = GA.materialize())
return std::move(Err);
// Import alias as a copy of its aliasee.
- GlobalObject *Base = GA.getBaseObject();
- if (Error Err = Base->materialize())
+ GlobalObject *GO = GA.getAliaseeObject();
+ if (Error Err = GO->materialize())
return std::move(Err);
auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
- LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << Base->getGUID()
- << " " << Base->getName() << " from "
+ LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
+ << GO->getName() << " from "
<< SrcModule->getSourceFileName() << "\n");
if (EnableImportMetadata) {
// Add 'thinlto_src_module' metadata for statistics and debugging.
@@ -1303,7 +1334,7 @@ Expected<bool> FunctionImporter::importFunctions(
std::move(SrcModule), GlobalsToImport.getArrayRef(),
[](GlobalValue &, IRMover::ValueAdder) {},
/*IsPerformingImport=*/true))
- report_fatal_error("Function Import: link error: " +
+ report_fatal_error(Twine("Function Import: link error: ") +
toString(std::move(Err)));
ImportedCount += GlobalsToImport.size();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index f61f4312b777..fbd083bb9bbf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -11,7 +11,6 @@
// are propagated to the callee by specializing the function.
//
// Current limitations:
-// - It does not handle specialization of recursive functions,
// - It does not yet handle integer ranges.
// - Only 1 argument per function is specialised,
// - The cost-model could be further looked into,
@@ -22,6 +21,18 @@
// a direct way to steer function specialization, avoiding the cost-model,
// and thus control compile-times / code-size.
//
+// Todos:
+// - Specializing recursive functions relies on running the transformation a
+// number of times, which is controlled by option
+// `func-specialization-max-iters`. Thus, increasing this value and the
+// number of iterations, will linearly increase the number of times recursive
+// functions get specialized, see also the discussion in
+// https://reviews.llvm.org/D106426 for details. Perhaps there is a
+// compile-time friendlier way to control/limit the number of specialisations
+// for recursive functions.
+// - Don't transform the function if there is no function specialization
+// happens.
+//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
@@ -59,20 +70,166 @@ static cl::opt<unsigned> MaxConstantsThreshold(
"specialization"),
cl::init(3));
+static cl::opt<unsigned> SmallFunctionThreshold(
+ "func-specialization-size-threshold", cl::Hidden,
+ cl::desc("Don't specialize functions that have less than this theshold "
+ "number of instructions"),
+ cl::init(100));
+
static cl::opt<unsigned>
AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden,
cl::desc("Average loop iteration count cost"),
cl::init(10));
+static cl::opt<bool> SpecializeOnAddresses(
+ "func-specialization-on-address", cl::init(false), cl::Hidden,
+ cl::desc("Enable function specialization on the address of global values"));
+
+// TODO: This needs checking to see the impact on compile-times, which is why
+// this is off by default for now.
static cl::opt<bool> EnableSpecializationForLiteralConstant(
"function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
- cl::desc("Make function specialization available for literal constant."));
+ cl::desc("Enable specialization of functions that take a literal constant "
+ "as an argument."));
+
+// Helper to check if \p LV is either a constant or a constant
+// range with a single element. This should cover exactly the same cases as the
+// old ValueLatticeElement::isConstant() and is intended to be used in the
+// transition to ValueLatticeElement.
+static bool isConstant(const ValueLatticeElement &LV) {
+ return LV.isConstant() ||
+ (LV.isConstantRange() && LV.getConstantRange().isSingleElement());
+}
// Helper to check if \p LV is either overdefined or a constant int.
static bool isOverdefined(const ValueLatticeElement &LV) {
- return !LV.isUnknownOrUndef() && !LV.isConstant();
+ return !LV.isUnknownOrUndef() && !isConstant(LV);
+}
+
+static Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call) {
+ Value *StoreValue = nullptr;
+ for (auto *User : Alloca->users()) {
+ // We can't use llvm::isAllocaPromotable() as that would fail because of
+ // the usage in the CallInst, which is what we check here.
+ if (User == Call)
+ continue;
+ if (auto *Bitcast = dyn_cast<BitCastInst>(User)) {
+ if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call)
+ return nullptr;
+ continue;
+ }
+
+ if (auto *Store = dyn_cast<StoreInst>(User)) {
+ // This is a duplicate store, bail out.
+ if (StoreValue || Store->isVolatile())
+ return nullptr;
+ StoreValue = Store->getValueOperand();
+ continue;
+ }
+ // Bail if there is any other unknown usage.
+ return nullptr;
+ }
+ return dyn_cast_or_null<Constant>(StoreValue);
}
+// A constant stack value is an AllocaInst that has a single constant
+// value stored to it. Return this constant if such an alloca stack value
+// is a function argument.
+static Constant *getConstantStackValue(CallInst *Call, Value *Val,
+ SCCPSolver &Solver) {
+ if (!Val)
+ return nullptr;
+ Val = Val->stripPointerCasts();
+ if (auto *ConstVal = dyn_cast<ConstantInt>(Val))
+ return ConstVal;
+ auto *Alloca = dyn_cast<AllocaInst>(Val);
+ if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy())
+ return nullptr;
+ return getPromotableAlloca(Alloca, Call);
+}
+
+// To support specializing recursive functions, it is important to propagate
+// constant arguments because after a first iteration of specialisation, a
+// reduced example may look like this:
+//
+// define internal void @RecursiveFn(i32* arg1) {
+// %temp = alloca i32, align 4
+// store i32 2 i32* %temp, align 4
+// call void @RecursiveFn.1(i32* nonnull %temp)
+// ret void
+// }
+//
+// Before a next iteration, we need to propagate the constant like so
+// which allows further specialization in next iterations.
+//
+// @funcspec.arg = internal constant i32 2
+//
+// define internal void @someFunc(i32* arg1) {
+// call void @otherFunc(i32* nonnull @funcspec.arg)
+// ret void
+// }
+//
+static void constantArgPropagation(SmallVectorImpl<Function *> &WorkList,
+ Module &M, SCCPSolver &Solver) {
+ // Iterate over the argument tracked functions see if there
+ // are any new constant values for the call instruction via
+ // stack variables.
+ for (auto *F : WorkList) {
+ // TODO: Generalize for any read only arguments.
+ if (F->arg_size() != 1)
+ continue;
+
+ auto &Arg = *F->arg_begin();
+ if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy())
+ continue;
+
+ for (auto *User : F->users()) {
+ auto *Call = dyn_cast<CallInst>(User);
+ if (!Call)
+ break;
+ auto *ArgOp = Call->getArgOperand(0);
+ auto *ArgOpType = ArgOp->getType();
+ auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
+ if (!ConstVal)
+ break;
+
+ Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
+ GlobalValue::InternalLinkage, ConstVal,
+ "funcspec.arg");
+
+ if (ArgOpType != ConstVal->getType())
+ GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOp->getType());
+
+ Call->setArgOperand(0, GV);
+
+ // Add the changed CallInst to Solver Worklist
+ Solver.visitCall(*Call);
+ }
+ }
+}
+
+// ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
+// interfere with the constantArgPropagation optimization.
+static void removeSSACopy(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!II)
+ continue;
+ if (II->getIntrinsicID() != Intrinsic::ssa_copy)
+ continue;
+ Inst.replaceAllUsesWith(II->getOperand(0));
+ Inst.eraseFromParent();
+ }
+ }
+}
+
+static void removeSSACopy(Module &M) {
+ for (Function &F : M)
+ removeSSACopy(F);
+}
+
+namespace {
class FunctionSpecializer {
/// The IPSCCP Solver.
@@ -115,9 +272,14 @@ public:
for (auto *SpecializedFunc : CurrentSpecializations) {
SpecializedFuncs.insert(SpecializedFunc);
- // TODO: If we want to support specializing specialized functions,
- // initialize here the state of the newly created functions, marking
- // them argument-tracked and executable.
+ // Initialize the state of the newly created functions, marking them
+ // argument-tracked and executable.
+ if (SpecializedFunc->hasExactDefinition() &&
+ !SpecializedFunc->hasFnAttribute(Attribute::Naked))
+ Solver.addTrackedFunction(SpecializedFunc);
+ Solver.addArgumentTrackedFunction(SpecializedFunc);
+ FuncDecls.push_back(SpecializedFunc);
+ Solver.markBlockExecutable(&SpecializedFunc->front());
// Replace the function arguments for the specialized functions.
for (Argument &Arg : SpecializedFunc->args())
@@ -138,12 +300,22 @@ public:
const ValueLatticeElement &IV = Solver.getLatticeValueFor(V);
if (isOverdefined(IV))
return false;
- auto *Const = IV.isConstant() ? Solver.getConstant(IV)
- : UndefValue::get(V->getType());
+ auto *Const =
+ isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType());
V->replaceAllUsesWith(Const);
- // TODO: Update the solver here if we want to specialize specialized
- // functions.
+ for (auto *U : Const->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ if (Solver.isBlockExecutable(I->getParent()))
+ Solver.visit(I);
+
+ // Remove the instruction from Block and Solver.
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I->isSafeToRemove()) {
+ I->eraseFromParent();
+ Solver.removeLatticeValueFor(I);
+ }
+ }
return true;
}
@@ -152,6 +324,15 @@ private:
// also in the cost model.
unsigned NbFunctionsSpecialized = 0;
+ /// Clone the function \p F and remove the ssa_copy intrinsics added by
+ /// the SCCPSolver in the cloned version.
+ Function *cloneCandidateFunction(Function *F) {
+ ValueToValueMapTy EmptyMap;
+ Function *Clone = CloneFunction(F, EmptyMap);
+ removeSSACopy(*Clone);
+ return Clone;
+ }
+
/// This function decides whether to specialize function \p F based on the
/// known constant values its arguments can take on. Specialization is
/// performed on the first interesting argument. Specializations based on
@@ -162,9 +343,8 @@ private:
SmallVectorImpl<Function *> &Specializations) {
// Do not specialize the cloned function again.
- if (SpecializedFuncs.contains(F)) {
+ if (SpecializedFuncs.contains(F))
return false;
- }
// If we're optimizing the function for size, we shouldn't specialize it.
if (F->hasOptSize() ||
@@ -176,8 +356,25 @@ private:
if (!Solver.isBlockExecutable(&F->getEntryBlock()))
return false;
+ // It wastes time to specialize a function which would get inlined finally.
+ if (F->hasFnAttribute(Attribute::AlwaysInline))
+ return false;
+
LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
<< "\n");
+
+ // Determine if it would be profitable to create a specialization of the
+ // function where the argument takes on the given constant value. If so,
+ // add the constant to Constants.
+ auto FnSpecCost = getSpecializationCost(F);
+ if (!FnSpecCost.isValid()) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: func specialisation cost: ";
+ FnSpecCost.print(dbgs()); dbgs() << "\n");
+
// Determine if we should specialize the function based on the values the
// argument can take on. If specialization is not profitable, we continue
// on to the next argument.
@@ -195,7 +392,7 @@ private:
// be set to false by isArgumentInteresting (that function only adds
// values to the Constants list that are deemed profitable).
SmallVector<Constant *, 4> Constants;
- if (!isArgumentInteresting(&A, Constants, IsPartial)) {
+ if (!isArgumentInteresting(&A, Constants, FnSpecCost, IsPartial)) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n");
continue;
}
@@ -214,8 +411,7 @@ private:
for (auto *C : Constants) {
// Clone the function. We leave the ValueToValueMap empty to allow
// IPSCCP to propagate the constant arguments.
- ValueToValueMapTy EmptyMap;
- Function *Clone = CloneFunction(F, EmptyMap);
+ Function *Clone = cloneCandidateFunction(F);
Argument *ClonedArg = Clone->arg_begin() + A.getArgNo();
// Rewrite calls to the function so that they call the clone instead.
@@ -231,9 +427,10 @@ private:
NbFunctionsSpecialized++;
}
- // TODO: if we want to support specialize specialized functions, and if
- // the function has been completely specialized, the original function is
- // no longer needed, so we would need to mark it unreachable here.
+ // If the function has been completely specialized, the original function
+ // is no longer needed. Mark it unreachable.
+ if (!IsPartial)
+ Solver.markFunctionUnreachable(F);
// FIXME: Only one argument per function.
return true;
@@ -253,7 +450,11 @@ private:
// If the code metrics reveal that we shouldn't duplicate the function, we
// shouldn't specialize it. Set the specialization cost to Invalid.
- if (Metrics.notDuplicatable) {
+ // Or if the lines of codes implies that this function is easy to get
+ // inlined so that we shouldn't specialize it.
+ if (Metrics.notDuplicatable ||
+ (!ForceFunctionSpecialization &&
+ Metrics.NumInsts < SmallFunctionThreshold)) {
InstructionCost C{};
C.setInvalid();
return C;
@@ -379,9 +580,8 @@ private:
/// argument.
bool isArgumentInteresting(Argument *A,
SmallVectorImpl<Constant *> &Constants,
+ const InstructionCost &FnSpecCost,
bool &IsPartial) {
- Function *F = A->getParent();
-
// For now, don't attempt to specialize functions based on the values of
// composite types.
if (!A->getType()->isSingleValueType() || A->user_empty())
@@ -420,18 +620,6 @@ private:
return false;
}
- // Determine if it would be profitable to create a specialization of the
- // function where the argument takes on the given constant value. If so,
- // add the constant to Constants.
- auto FnSpecCost = getSpecializationCost(F);
- if (!FnSpecCost.isValid()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: func specialisation cost: ";
- FnSpecCost.print(dbgs()); dbgs() << "\n");
-
for (auto *C : PossibleConstants) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Constant: " << *C << "\n");
if (ForceFunctionSpecialization) {
@@ -475,6 +663,12 @@ private:
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
auto &CS = *cast<CallBase>(U);
+ // If the call site has attribute minsize set, that callsite won't be
+ // specialized.
+ if (CS.hasFnAttr(Attribute::MinSize)) {
+ AllConstant = false;
+ continue;
+ }
// If the parent of the call site will never be executed, we don't need
// to worry about the passed value.
@@ -482,11 +676,25 @@ private:
continue;
auto *V = CS.getArgOperand(A->getArgNo());
+ if (isa<PoisonValue>(V))
+ return false;
+
+ // For now, constant expressions are fine but only if they are function
+ // calls.
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (!isa<Function>(CE->getOperand(0)))
+ return false;
+
// TrackValueOfGlobalVariable only tracks scalar global variables.
if (auto *GV = dyn_cast<GlobalVariable>(V)) {
- if (!GV->getValueType()->isSingleValueType()) {
+ // Check if we want to specialize on the address of non-constant
+ // global values.
+ if (!GV->isConstant())
+ if (!SpecializeOnAddresses)
+ return false;
+
+ if (!GV->getValueType()->isSingleValueType())
return false;
- }
}
if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
@@ -506,6 +714,9 @@ private:
/// This function modifies calls to function \p F whose argument at index \p
/// ArgNo is equal to constant \p C. The calls are rewritten to call function
/// \p Clone instead.
+ ///
+ /// Callsites that have been marked with the MinSize function attribute won't
+ /// be specialized and rewritten.
void rewriteCallSites(Function *F, Function *Clone, Argument &Arg,
Constant *C) {
unsigned ArgNo = Arg.getArgNo();
@@ -527,24 +738,7 @@ private:
}
}
};
-
-/// Function to clean up the left over intrinsics from SCCP util.
-static void cleanup(Module &M) {
- for (Function &F : M) {
- for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
- Instruction *Inst = &*BI++;
- if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
- if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
- Value *Op = II->getOperand(0);
- Inst->replaceAllUsesWith(Op);
- Inst->eraseFromParent();
- }
- }
- }
- }
- }
-}
+} // namespace
bool llvm::runFunctionSpecialization(
Module &M, const DataLayout &DL,
@@ -597,12 +791,27 @@ bool llvm::runFunctionSpecialization(
Solver.trackValueOfGlobalVariable(&G);
}
+ auto &TrackedFuncs = Solver.getArgumentTrackedFunctions();
+ SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(),
+ TrackedFuncs.end());
+
+ // No tracked functions, so nothing to do: don't run the solver and remove
+ // the ssa_copy intrinsics that may have been introduced.
+ if (TrackedFuncs.empty()) {
+ removeSSACopy(M);
+ return false;
+ }
+
// Solve for constants.
auto RunSCCPSolver = [&](auto &WorkList) {
bool ResolvedUndefs = true;
while (ResolvedUndefs) {
+ // Not running the solver unnecessary is checked in regression test
+ // nothing-to-do.ll, so if this debug message is changed, this regression
+ // test needs updating too.
LLVM_DEBUG(dbgs() << "FnSpecialization: Running solver\n");
+
Solver.solve();
LLVM_DEBUG(dbgs() << "FnSpecialization: Resolving undefs\n");
ResolvedUndefs = false;
@@ -615,15 +824,14 @@ bool llvm::runFunctionSpecialization(
for (BasicBlock &BB : *F) {
if (!Solver.isBlockExecutable(&BB))
continue;
+ // FIXME: The solver may make changes to the function here, so set
+ // Changed, even if later function specialization does not trigger.
for (auto &I : make_early_inc_range(BB))
- FS.tryToReplaceWithConstant(&I);
+ Changed |= FS.tryToReplaceWithConstant(&I);
}
}
};
- auto &TrackedFuncs = Solver.getArgumentTrackedFunctions();
- SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(),
- TrackedFuncs.end());
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "FnSpecialization: Worklist fn decls:\n");
for (auto *F : FuncDecls)
@@ -637,14 +845,18 @@ bool llvm::runFunctionSpecialization(
unsigned I = 0;
while (FuncSpecializationMaxIters != I++ &&
FS.specializeFunctions(FuncDecls, CurrentSpecializations)) {
- // TODO: run the solver here for the specialized functions only if we want
- // to specialize recursively.
+
+ // Run the solver for the specialized functions.
+ RunSCCPSolver(CurrentSpecializations);
+
+ // Replace some unresolved constant arguments.
+ constantArgPropagation(FuncDecls, M, Solver);
CurrentSpecializations.clear();
Changed = true;
}
// Clean up the IR by removing ssa_copy intrinsics.
- cleanup(M);
+ removeSSACopy(M);
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index fb4cb23b837e..5e5d2086adc2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -88,7 +88,7 @@ ModulePass *llvm::createGlobalDCEPass() {
static bool isEmptyFunction(Function *F) {
BasicBlock &Entry = F->getEntryBlock();
for (auto &I : Entry) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
if (auto *RI = dyn_cast<ReturnInst>(&I))
return !RI->getReturnValue();
@@ -210,7 +210,7 @@ void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId,
Constant *Ptr =
getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset,
- *Caller->getParent());
+ *Caller->getParent(), VTable);
if (!Ptr) {
LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n");
VFESafeVTables.erase(VTable);
@@ -416,6 +416,16 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// virtual function pointers with null, allowing us to remove the
// function itself.
++NumVFuncs;
+
+ // Detect vfuncs that are referenced as "relative pointers" which are used
+ // in Swift vtables, i.e. entries in the form of:
+ //
+ // i32 trunc (i64 sub (i64 ptrtoint @f, i64 ptrtoint ...)) to i32)
+ //
+ // In this case, replace the whole "sub" expression with constant 0 to
+ // avoid leaving a weird sub(0, symbol) expression behind.
+ replaceRelativePointerUsersWithZero(F);
+
F->replaceNonMetadataUsesWith(ConstantPointerNull::get(F->getType()));
}
EraseUnusedGlobalValue(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 8750eb9ecc4e..b2c2efed7db8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -208,9 +208,7 @@ CleanupPointerRootUsers(GlobalVariable *GV,
SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
// Constants can't be pointers to dynamically allocated memory.
- for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
- UI != E;) {
- User *U = *UI++;
+ for (User *U : llvm::make_early_inc_range(GV->users())) {
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
Value *V = SI->getValueOperand();
if (isa<Constant>(V)) {
@@ -703,8 +701,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
!ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
isa<LoadInst>(U->getOperand(0)) &&
isa<ConstantPointerNull>(U->getOperand(1))) {
- assert(isa<GlobalValue>(
- cast<LoadInst>(U->getOperand(0))->getPointerOperand()) &&
+ assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
+ ->getPointerOperand()
+ ->stripPointerCasts()) &&
"Should be GlobalVariable");
// This and only this kind of non-signed ICmpInst is to be replaced with
// the comparing of the value of the created global init bool later in
@@ -720,22 +719,55 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
/// Return true if all uses of any loads from GV will trap if the loaded value
/// is null. Note that this also permits comparisons of the loaded value
/// against null, as a special case.
-static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
- for (const User *U : GV->users())
- if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
- SmallPtrSet<const PHINode*, 8> PHIs;
- if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(GV);
+ while (!Worklist.empty()) {
+ const Value *P = Worklist.pop_back_val();
+ for (auto *U : P->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(U)) {
+ SmallPtrSet<const PHINode *, 8> PHIs;
+ if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+ return false;
+ } else if (auto *SI = dyn_cast<StoreInst>(U)) {
+ // Ignore stores to the global.
+ if (SI->getPointerOperand() != P)
+ return false;
+ } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->stripPointerCasts() != GV)
+ return false;
+ // Check further the ConstantExpr.
+ Worklist.push_back(CE);
+ } else {
+ // We don't know or understand this user, bail out.
return false;
- } else if (isa<StoreInst>(U)) {
- // Ignore stores to the global.
- } else {
- // We don't know or understand this user, bail out.
- //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
- return false;
+ }
}
+ }
+
return true;
}
+/// Get all the loads/store uses for global variable \p GV.
+static void allUsesOfLoadAndStores(GlobalVariable *GV,
+ SmallVector<Value *, 4> &Uses) {
+ SmallVector<Value *, 4> Worklist;
+ Worklist.push_back(GV);
+ while (!Worklist.empty()) {
+ auto *P = Worklist.pop_back_val();
+ for (auto *U : P->users()) {
+ if (auto *CE = dyn_cast<ConstantExpr>(U)) {
+ Worklist.push_back(CE);
+ continue;
+ }
+
+ assert((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
+ "Expect only load or store instructions");
+ Uses.push_back(U);
+ }
+ }
+}
+
static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
bool Changed = false;
for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
@@ -817,8 +849,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(
bool AllNonStoreUsesGone = true;
// Replace all uses of loads with uses of uses of the stored value.
- for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
- User *GlobalUser = *GUI++;
+ for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
// If we were able to delete all uses of the loads
@@ -934,9 +965,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
}
}
- Constant *RepValue = NewGV;
- if (NewGV->getType() != GV->getValueType())
- RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
+ SmallPtrSet<Constant *, 1> RepValues;
+ RepValues.insert(NewGV);
// If there is a comparison against null, we will insert a global bool to
// keep track of whether the global was initialized yet or not.
@@ -947,9 +977,11 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
GV->getName()+".init", GV->getThreadLocalMode());
bool InitBoolUsed = false;
- // Loop over all uses of GV, processing them in turn.
- while (!GV->use_empty()) {
- if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
+ // Loop over all instruction uses of GV, processing them in turn.
+ SmallVector<Value *, 4> Guses;
+ allUsesOfLoadAndStores(GV, Guses);
+ for (auto *U : Guses) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// The global is initialized when the store to it occurs. If the stored
// value is null value, the global bool is set to false, otherwise true.
new StoreInst(ConstantInt::getBool(
@@ -961,12 +993,14 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
continue;
}
- LoadInst *LI = cast<LoadInst>(GV->user_back());
+ LoadInst *LI = cast<LoadInst>(U);
while (!LI->use_empty()) {
Use &LoadUse = *LI->use_begin();
ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
if (!ICI) {
- LoadUse = RepValue;
+ auto *CE = ConstantExpr::getBitCast(NewGV, LI->getType());
+ RepValues.insert(CE);
+ LoadUse.set(CE);
continue;
}
@@ -1012,40 +1046,53 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
- ConstantPropUsersOf(NewGV, DL, TLI);
- if (RepValue != NewGV)
- ConstantPropUsersOf(RepValue, DL, TLI);
+ for (auto *CE : RepValues)
+ ConstantPropUsersOf(CE, DL, TLI);
return NewGV;
}
-/// Scan the use-list of V checking to make sure that there are no complex uses
-/// of V. We permit simple things like dereferencing the pointer, but not
+/// Scan the use-list of GV checking to make sure that there are no complex uses
+/// of GV. We permit simple things like dereferencing the pointer, but not
/// storing through the address, unless it is to the specified global.
static bool
-valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
const GlobalVariable *GV) {
- for (const User *U : V->users()) {
- const Instruction *Inst = cast<Instruction>(U);
+ SmallPtrSet<const Value *, 4> Visited;
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(CI);
- if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
- continue; // Fine, ignore.
- }
+ while (!Worklist.empty()) {
+ const Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
- if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
- return false; // Storing the pointer itself... bad.
- continue; // Otherwise, storing through it, or storing into GV... fine.
- }
+ for (const Use &VUse : V->uses()) {
+ const User *U = VUse.getUser();
+ if (isa<LoadInst>(U) || isa<CmpInst>(U))
+ continue; // Fine, ignore.
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
- if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV))
- return false;
- continue;
- }
+ if (auto *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getValueOperand() == V &&
+ SI->getPointerOperand()->stripPointerCasts() != GV)
+ return false; // Storing the pointer not into GV... bad.
+ continue; // Otherwise, storing through it, or storing into GV... fine.
+ }
- return false;
+ if (auto *BCI = dyn_cast<BitCastInst>(U)) {
+ Worklist.push_back(BCI);
+ continue;
+ }
+
+ if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ Worklist.push_back(GEPI);
+ continue;
+ }
+
+ return false;
+ }
}
+
return true;
}
@@ -1066,12 +1113,12 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// been reached). To do this, we check to see if all uses of the global
// would trap if the global were null: this proves that they must all
// happen after the malloc.
- if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+ if (!allUsesOfLoadedValueWillTrapIfNull(GV))
return false;
// We can't optimize this if the malloc itself is used in a complex way,
// for example, being stored into multiple globals. This allows the
- // malloc to be stored into the specified global, loaded icmp'd.
+ // malloc to be stored into the specified global, loaded, gep, icmp'd.
// These are all things we could transform to using the global for.
if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
return false;
@@ -1112,6 +1159,7 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// value was null.
if (GV->getInitializer()->getType()->isPointerTy() &&
GV->getInitializer()->isNullValue() &&
+ StoredOnceVal->getType()->isPointerTy() &&
!NullPointerIsDefined(
nullptr /* F */,
GV->getInitializer()->getType()->getPointerAddressSpace())) {
@@ -1442,8 +1490,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
append_range(UUsers, U->users());
for (auto *UU : UUsers) {
Instruction *UI = cast<Instruction>(UU);
- Instruction *NewU = U->getAsInstruction();
- NewU->insertBefore(UI);
+ Instruction *NewU = U->getAsInstruction(UI);
UI->replaceUsesOfWith(U, NewU);
}
// We've replaced all the uses, so destroy the constant. (destroyConstant
@@ -1456,6 +1503,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
/// it if possible. If we make a change, return true.
static bool
processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
auto &DL = GV->getParent()->getDataLayout();
@@ -1554,43 +1602,57 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
if (SRAGlobal(GV, DL))
return true;
}
- if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
+ Value *StoredOnceValue = GS.getStoredOnceValue();
+ if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
+ // Avoid speculating constant expressions that might trap (div/rem).
+ auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
+ if (SOVConstant && SOVConstant->canTrap())
+ return Changed;
+
+ Function &StoreFn =
+ const_cast<Function &>(*GS.StoredOnceStore->getFunction());
+ bool CanHaveNonUndefGlobalInitializer =
+ GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
+ GV->getType()->getAddressSpace());
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
// global. This allows us to mark it constant.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (isa<UndefValue>(GV->getInitializer())) {
- // Change the initial value here.
- GV->setInitializer(SOVConstant);
-
- // Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
-
- if (GV->use_empty()) {
- LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
- << "simplify all users and delete global!\n");
- GV->eraseFromParent();
- ++NumDeleted;
- }
- ++NumSubstitute;
- return true;
+ // This is restricted to address spaces that allow globals to have
+ // initializers. NVPTX, for example, does not support initializers for
+ // shared memory (AS 3).
+ if (SOVConstant && SOVConstant->getType() == GV->getValueType() &&
+ isa<UndefValue>(GV->getInitializer()) &&
+ CanHaveNonUndefGlobalInitializer) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+
+ if (GV->use_empty()) {
+ LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
}
+ ++NumSubstitute;
+ return true;
+ }
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
- GetTLI))
+ if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
- // boolean.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
- if (GS.Ordering == AtomicOrdering::NotAtomic) {
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
- ++NumShrunkToBool;
- return true;
- }
+ // boolean. Skip this optimization for AS that doesn't allow an initializer.
+ if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
+ (!isa<UndefValue>(GV->getInitializer()) ||
+ CanHaveNonUndefGlobalInitializer)) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
}
}
}
@@ -1602,6 +1664,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
/// make a change, return true.
static bool
processGlobal(GlobalValue &GV,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
if (GV.getName().startswith("llvm."))
@@ -1634,7 +1697,8 @@ processGlobal(GlobalValue &GV,
if (GVar->isConstant() || !GVar->hasInitializer())
return Changed;
- return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
+ return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
+ Changed;
}
/// Walk all of the direct calls of the specified function, changing them to
@@ -1651,7 +1715,7 @@ static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
Attribute::AttrKind A) {
unsigned AttrIndex;
if (Attrs.hasAttrSomewhere(A, &AttrIndex))
- return Attrs.removeAttribute(C, AttrIndex, A);
+ return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
return Attrs;
}
@@ -1864,10 +1928,8 @@ static void RemovePreallocated(Function *F) {
Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
if (!AllocaReplacement) {
auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
- auto *ArgType = UseCall
- ->getAttribute(AttributeList::FunctionIndex,
- Attribute::Preallocated)
- .getValueAsType();
+ auto *ArgType =
+ UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
Builder.SetInsertPoint(InsertBefore);
auto *Alloca =
@@ -1897,26 +1959,22 @@ OptimizeFunctions(Module &M,
bool Changed = false;
std::vector<Function *> AllCallsCold;
- for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
- Function *F = &*FI++;
- if (hasOnlyColdCalls(*F, GetBFI))
- AllCallsCold.push_back(F);
- }
+ for (Function &F : llvm::make_early_inc_range(M))
+ if (hasOnlyColdCalls(F, GetBFI))
+ AllCallsCold.push_back(&F);
// Optimize functions.
- for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
- Function *F = &*FI++;
-
+ for (Function &F : llvm::make_early_inc_range(M)) {
// Don't perform global opt pass on naked functions; we don't want fast
// calling conventions for naked functions.
- if (F->hasFnAttribute(Attribute::Naked))
+ if (F.hasFnAttribute(Attribute::Naked))
continue;
// Functions without names cannot be referenced outside this module.
- if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
- F->setLinkage(GlobalValue::InternalLinkage);
+ if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
+ F.setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(*F, NotDiscardableComdats)) {
+ if (deleteIfDead(F, NotDiscardableComdats)) {
Changed = true;
continue;
}
@@ -1931,17 +1989,17 @@ OptimizeFunctions(Module &M,
// some more complicated logic to break these cycles.
// Removing unreachable blocks might invalidate the dominator so we
// recalculate it.
- if (!F->isDeclaration()) {
- if (removeUnreachableBlocks(*F)) {
- auto &DT = LookupDomTree(*F);
- DT.recalculate(*F);
+ if (!F.isDeclaration()) {
+ if (removeUnreachableBlocks(F)) {
+ auto &DT = LookupDomTree(F);
+ DT.recalculate(F);
Changed = true;
}
}
- Changed |= processGlobal(*F, GetTLI, LookupDomTree);
+ Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree);
- if (!F->hasLocalLinkage())
+ if (!F.hasLocalLinkage())
continue;
// If we have an inalloca parameter that we can safely remove the
@@ -1949,56 +2007,55 @@ OptimizeFunctions(Module &M,
// wouldn't be safe in the presence of inalloca.
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
- if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
- !F->hasAddressTaken() && !hasMustTailCallers(F)) {
- RemoveAttribute(F, Attribute::InAlloca);
+ if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
+ !F.hasAddressTaken() && !hasMustTailCallers(&F)) {
+ RemoveAttribute(&F, Attribute::InAlloca);
Changed = true;
}
// FIXME: handle invokes
// FIXME: handle musttail
- if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
- if (!F->hasAddressTaken() && !hasMustTailCallers(F) &&
- !hasInvokeCallers(F)) {
- RemovePreallocated(F);
+ if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
+ if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
+ !hasInvokeCallers(&F)) {
+ RemovePreallocated(&F);
Changed = true;
}
continue;
}
- if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
+ if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
NumInternalFunc++;
- TargetTransformInfo &TTI = GetTTI(*F);
+ TargetTransformInfo &TTI = GetTTI(F);
// Change the calling convention to coldcc if either stress testing is
// enabled or the target would like to use coldcc on functions which are
// cold at all call sites and the callers contain no other non coldcc
// calls.
if (EnableColdCCStressTest ||
- (TTI.useColdCCForColdCall(*F) &&
- isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
- F->setCallingConv(CallingConv::Cold);
- changeCallSitesToColdCC(F);
+ (TTI.useColdCCForColdCall(F) &&
+ isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
+ F.setCallingConv(CallingConv::Cold);
+ changeCallSitesToColdCC(&F);
Changed = true;
NumColdCC++;
}
}
- if (hasChangeableCC(F) && !F->isVarArg() &&
- !F->hasAddressTaken()) {
+ if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
// If this function has a calling convention worth changing, is not a
// varargs function, and is only called directly, promote it to use the
// Fast calling convention.
- F->setCallingConv(CallingConv::Fast);
- ChangeCalleesToFastCall(F);
+ F.setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(&F);
++NumFastCallFns;
Changed = true;
}
- if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
- !F->hasAddressTaken()) {
+ if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F.hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute.
- RemoveAttribute(F, Attribute::Nest);
+ RemoveAttribute(&F, Attribute::Nest);
++NumNestRemoved;
Changed = true;
}
@@ -2008,35 +2065,34 @@ OptimizeFunctions(Module &M,
static bool
OptimizeGlobalVars(Module &M,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// Global variables without names cannot be referenced outside this module.
- if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
- GV->setLinkage(GlobalValue::InternalLinkage);
+ if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
+ GV.setLinkage(GlobalValue::InternalLinkage);
// Simplify the initializer.
- if (GV->hasInitializer())
- if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
+ if (GV.hasInitializer())
+ if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
auto &DL = M.getDataLayout();
// TLI is not used in the case of a Constant, so use default nullptr
// for that optional parameter, since we don't have a Function to
// provide GetTLI anyway.
Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
if (New != C)
- GV->setInitializer(New);
+ GV.setInitializer(New);
}
- if (deleteIfDead(*GV, NotDiscardableComdats)) {
+ if (deleteIfDead(GV, NotDiscardableComdats)) {
Changed = true;
continue;
}
- Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
+ Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
}
return Changed;
}
@@ -2425,24 +2481,21 @@ OptimizeGlobalAliases(Module &M,
for (GlobalValue *GV : Used.used())
Used.compilerUsedErase(GV);
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E;) {
- GlobalAlias *J = &*I++;
-
+ for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
// Aliases without names cannot be referenced outside this module.
- if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
- J->setLinkage(GlobalValue::InternalLinkage);
+ if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
+ J.setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(*J, NotDiscardableComdats)) {
+ if (deleteIfDead(J, NotDiscardableComdats)) {
Changed = true;
continue;
}
// If the alias can change at link time, nothing can be done - bail out.
- if (J->isInterposable())
+ if (J.isInterposable())
continue;
- Constant *Aliasee = J->getAliasee();
+ Constant *Aliasee = J.getAliasee();
GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
// We can't trivially replace the alias with the aliasee if the aliasee is
// non-trivial in some way. We also can't replace the alias with the aliasee
@@ -2455,31 +2508,31 @@ OptimizeGlobalAliases(Module &M,
// Make all users of the alias use the aliasee instead.
bool RenameTarget;
- if (!hasUsesToReplace(*J, Used, RenameTarget))
+ if (!hasUsesToReplace(J, Used, RenameTarget))
continue;
- J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
+ J.replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J.getType()));
++NumAliasesResolved;
Changed = true;
if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
- Target->takeName(&*J);
- Target->setLinkage(J->getLinkage());
- Target->setDSOLocal(J->isDSOLocal());
- Target->setVisibility(J->getVisibility());
- Target->setDLLStorageClass(J->getDLLStorageClass());
+ Target->takeName(&J);
+ Target->setLinkage(J.getLinkage());
+ Target->setDSOLocal(J.isDSOLocal());
+ Target->setVisibility(J.getVisibility());
+ Target->setDLLStorageClass(J.getDLLStorageClass());
- if (Used.usedErase(&*J))
+ if (Used.usedErase(&J))
Used.usedInsert(Target);
- if (Used.compilerUsedErase(&*J))
+ if (Used.compilerUsedErase(&J))
Used.compilerUsedInsert(Target);
- } else if (mayHaveOtherReferences(*J, Used))
+ } else if (mayHaveOtherReferences(J, Used))
continue;
// Delete the alias.
- M.getAliasList().erase(J);
+ M.getAliasList().erase(&J);
++NumAliasesRemoved;
Changed = true;
}
@@ -2526,7 +2579,7 @@ static bool cxxDtorIsEmpty(const Function &Fn) {
return false;
for (auto &I : Fn.getEntryBlock()) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
if (isa<ReturnInst>(I))
return true;
@@ -2552,12 +2605,11 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
// and remove them.
bool Changed = false;
- for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
- I != E;) {
+ for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
// We're only interested in calls. Theoretically, we could handle invoke
// instructions as well, but neither llvm-gcc nor clang generate invokes
// to __cxa_atexit.
- CallInst *CI = dyn_cast<CallInst>(*I++);
+ CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
@@ -2614,8 +2666,8 @@ static bool optimizeGlobalsInModule(
});
// Optimize non-address-taken globals.
- LocalChange |=
- OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
+ LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
+ NotDiscardableComdats);
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index 365b269dc3bf..e7d698c42fcf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -154,11 +154,8 @@ static bool splitGlobals(Module &M) {
return false;
bool Changed = false;
- for (auto I = M.global_begin(); I != M.global_end();) {
- GlobalVariable &GV = *I;
- ++I;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals()))
Changed |= splitGlobal(GV);
- }
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
index adf9ffba5780..b8a314c54f18 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
@@ -33,6 +34,10 @@
using namespace llvm;
using namespace IRSimilarity;
+// A command flag to be used for debugging to exclude branches from similarity
+// matching and outlining.
+extern cl::opt<bool> DisableBranches;
+
// Set to true if the user wants the ir outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
// functions. Since the outliner is confined to a single module (modulo LTO),
@@ -71,8 +76,12 @@ struct OutlinableGroup {
/// for extraction.
bool IgnoreGroup = false;
- /// The return block for the overall function.
- BasicBlock *EndBB = nullptr;
+ /// The return blocks for the overall function.
+ DenseMap<Value *, BasicBlock *> EndBBs;
+
+ /// The PHIBlocks with their corresponding return block based on the return
+ /// value as the key.
+ DenseMap<Value *, BasicBlock *> PHIBlocks;
/// A set containing the different GVN store sets needed. Each array contains
/// a sorted list of the different values that need to be stored into output
@@ -87,6 +96,14 @@ struct OutlinableGroup {
/// index in ArgumentTypes is an output argument.
unsigned NumAggregateInputs = 0;
+ /// The mapping of the canonical numbering of the values in outlined sections
+ /// to specific arguments.
+ DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
+
+ /// The number of branches in the region target a basic block that is outside
+ /// of the region.
+ unsigned BranchesToOutside = 0;
+
/// The number of instructions that will be outlined by extracting \ref
/// Regions.
InstructionCost Benefit = 0;
@@ -118,20 +135,67 @@ struct OutlinableGroup {
/// \param SourceBB - the BasicBlock to pull Instructions from.
/// \param TargetBB - the BasicBlock to put Instruction into.
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
- BasicBlock::iterator BBCurr, BBEnd, BBNext;
- for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
- BBCurr = BBNext) {
- BBNext = std::next(BBCurr);
- BBCurr->moveBefore(TargetBB, TargetBB.end());
- }
+ for (Instruction &I : llvm::make_early_inc_range(SourceBB))
+ I.moveBefore(TargetBB, TargetBB.end());
+}
+
+/// A function to sort the keys of \p Map, which must be a mapping of constant
+/// values to basic blocks and return it in \p SortedKeys
+///
+/// \param SortedKeys - The vector the keys will be return in and sorted.
+/// \param Map - The DenseMap containing keys to sort.
+static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
+ DenseMap<Value *, BasicBlock *> &Map) {
+ for (auto &VtoBB : Map)
+ SortedKeys.push_back(VtoBB.first);
+
+ stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
+ const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
+ const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
+ assert(RHSC && "Not a constant integer in return value?");
+ assert(LHSC && "Not a constant integer in return value?");
+
+ return LHSC->getLimitedValue() < RHSC->getLimitedValue();
+ });
+}
+
+Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
+ Value *V) {
+ Optional<unsigned> GVN = Candidate->getGVN(V);
+ assert(GVN.hasValue() && "No GVN for incoming value");
+ Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
+ Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
+ Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
+ return FoundValueOpt.getValueOr(nullptr);
}
void OutlinableRegion::splitCandidate() {
assert(!CandidateSplit && "Candidate already split!");
+ Instruction *BackInst = Candidate->backInstruction();
+
+ Instruction *EndInst = nullptr;
+ // Check whether the last instruction is a terminator, if it is, we do
+ // not split on the following instruction. We leave the block as it is. We
+ // also check that this is not the last instruction in the Module, otherwise
+ // the check for whether the current following instruction matches the
+ // previously recorded instruction will be incorrect.
+ if (!BackInst->isTerminator() ||
+ BackInst->getParent() != &BackInst->getFunction()->back()) {
+ EndInst = Candidate->end()->Inst;
+ assert(EndInst && "Expected an end instruction?");
+ }
+
+ // We check if the current instruction following the last instruction in the
+ // region is the same as the recorded instruction following the last
+ // instruction. If they do not match, there could be problems in rewriting
+ // the program after outlining, so we ignore it.
+ if (!BackInst->isTerminator() &&
+ EndInst != BackInst->getNextNonDebugInstruction())
+ return;
+
Instruction *StartInst = (*Candidate->begin()).Inst;
- Instruction *EndInst = (*Candidate->end()).Inst;
- assert(StartInst && EndInst && "Expected a start and end instruction?");
+ assert(StartInst && "Expected a start instruction?");
StartBB = StartInst->getParent();
PrevBB = StartBB;
@@ -153,13 +217,20 @@ void OutlinableRegion::splitCandidate() {
std::string OriginalName = PrevBB->getName().str();
StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
-
- // This is the case for the inner block since we do not have to include
- // multiple blocks.
- EndBB = StartBB;
- FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+ PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
CandidateSplit = true;
+ if (!BackInst->isTerminator()) {
+ EndBB = EndInst->getParent();
+ FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+ EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
+ FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
+ return;
+ }
+
+ EndBB = BackInst->getParent();
+ EndsInBranch = true;
+ FollowBB = nullptr;
}
void OutlinableRegion::reattachCandidate() {
@@ -180,7 +251,6 @@ void OutlinableRegion::reattachCandidate() {
// inst3
// inst4
assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
- assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
// StartBB should only have one predecessor since we put an unconditional
// branch at the end of PrevBB when we split the BasicBlock.
@@ -189,21 +259,24 @@ void OutlinableRegion::reattachCandidate() {
"No Predecessor for the region start basic block!");
assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
- assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
PrevBB->getTerminator()->eraseFromParent();
- EndBB->getTerminator()->eraseFromParent();
moveBBContents(*StartBB, *PrevBB);
BasicBlock *PlacementBB = PrevBB;
if (StartBB != EndBB)
PlacementBB = EndBB;
- moveBBContents(*FollowBB, *PlacementBB);
+ if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
+ assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
+ assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
+ PlacementBB->getTerminator()->eraseFromParent();
+ moveBBContents(*FollowBB, *PlacementBB);
+ PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
+ FollowBB->eraseFromParent();
+ }
PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
- PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
StartBB->eraseFromParent();
- FollowBB->eraseFromParent();
// Make sure to save changes back to the StartBB.
StartBB = PrevBB;
@@ -261,8 +334,9 @@ InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
// division instruction for targets that have a native division instruction.
// To be overly conservative, we only add 1 to the number of instructions for
// each division instruction.
- for (Instruction &I : *StartBB) {
- switch (I.getOpcode()) {
+ for (IRInstructionData &ID : *Candidate) {
+ Instruction *I = ID.Inst;
+ switch (I->getOpcode()) {
case Instruction::FDiv:
case Instruction::FRem:
case Instruction::SDiv:
@@ -272,7 +346,7 @@ InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
Benefit += 1;
break;
default:
- Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+ Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
break;
}
}
@@ -373,8 +447,24 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
unsigned FunctionNameSuffix) {
assert(!Group.OutlinedFunction && "Function is already defined!");
+ Type *RetTy = Type::getVoidTy(M.getContext());
+ // All extracted functions _should_ have the same return type at this point
+ // since the similarity identifier ensures that all branches outside of the
+ // region occur in the same place.
+
+ // NOTE: Should we ever move to the model that uses a switch at every point
+ // needed, meaning that we could branch within the region or out, it is
+ // possible that we will need to switch to using the most general case all of
+ // the time.
+ for (OutlinableRegion *R : Group.Regions) {
+ Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
+ if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
+ (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
+ RetTy = ExtractedFuncType;
+ }
+
Group.OutlinedFunctionType = FunctionType::get(
- Type::getVoidTy(M.getContext()), Group.ArgumentTypes, false);
+ RetTy, Group.ArgumentTypes, false);
// These functions will only be called from within the same module, so
// we can set an internal linkage.
@@ -430,21 +520,23 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
///
/// \param [in] Old - The function to move the basic blocks from.
/// \param [in] New - The function to move the basic blocks to.
-/// \returns the first return block for the function in New.
-static BasicBlock *moveFunctionData(Function &Old, Function &New) {
- Function::iterator CurrBB, NextBB, FinalBB;
- BasicBlock *NewEnd = nullptr;
- std::vector<Instruction *> DebugInsts;
- for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB;
- CurrBB = NextBB) {
- NextBB = std::next(CurrBB);
- CurrBB->removeFromParent();
- CurrBB->insertInto(&New);
- Instruction *I = CurrBB->getTerminator();
- if (isa<ReturnInst>(I))
- NewEnd = &(*CurrBB);
-
- for (Instruction &Val : *CurrBB) {
+/// \param [out] NewEnds - The return blocks of the new overall function.
+static void moveFunctionData(Function &Old, Function &New,
+ DenseMap<Value *, BasicBlock *> &NewEnds) {
+ for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
+ CurrBB.removeFromParent();
+ CurrBB.insertInto(&New);
+ Instruction *I = CurrBB.getTerminator();
+
+ // For each block we find a return instruction is, it is a potential exit
+ // path for the function. We keep track of each block based on the return
+ // value here.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+ NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
+
+ std::vector<Instruction *> DebugInsts;
+
+ for (Instruction &Val : CurrBB) {
// We must handle the scoping of called functions differently than
// other outlined instructions.
if (!isa<CallInst>(&Val)) {
@@ -476,8 +568,7 @@ static BasicBlock *moveFunctionData(Function &Old, Function &New) {
I->eraseFromParent();
}
- assert(NewEnd && "No return instruction for new function?");
- return NewEnd;
+ assert(NewEnds.size() > 0 && "No return instruction for new function?");
}
/// Find the the constants that will need to be lifted into arguments
@@ -664,11 +755,22 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// function to account for the extracted constants, we have two different
// counters as we find extracted arguments, and as we come across overall
// arguments.
+
+ // Additionally, in our first pass, for the first extracted function,
+ // we find argument locations for the canonical value numbering. This
+ // numbering overrides any discovered location for the extracted code.
for (unsigned InputVal : InputGVNs) {
+ Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
+ assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
+ unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
+
Optional<Value *> InputOpt = C.fromGVN(InputVal);
assert(InputOpt.hasValue() && "Global value number not found?");
Value *Input = InputOpt.getValue();
+ DenseMap<unsigned, unsigned>::iterator AggArgIt =
+ Group.CanonicalNumberToAggArg.find(CanonicalNumber);
+
if (!Group.InputTypesSet) {
Group.ArgumentTypes.push_back(Input->getType());
// If the input value has a swifterr attribute, make sure to mark the
@@ -684,17 +786,34 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// Check if we have a constant. If we do add it to the overall argument
// number to Constant map for the region, and continue to the next input.
if (Constant *CST = dyn_cast<Constant>(Input)) {
- Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
+ if (AggArgIt != Group.CanonicalNumberToAggArg.end())
+ Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
+ else {
+ Group.CanonicalNumberToAggArg.insert(
+ std::make_pair(CanonicalNumber, TypeIndex));
+ Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
+ }
TypeIndex++;
continue;
}
// It is not a constant, we create the mapping from extracted argument list
- // to the overall argument list.
+ // to the overall argument list, using the canonical location, if it exists.
assert(ArgInputs.count(Input) && "Input cannot be found!");
- Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
- Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
+ if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
+ if (OriginalIndex != AggArgIt->second)
+ Region.ChangedArgOrder = true;
+ Region.ExtractedArgToAgg.insert(
+ std::make_pair(OriginalIndex, AggArgIt->second));
+ Region.AggArgToExtracted.insert(
+ std::make_pair(AggArgIt->second, OriginalIndex));
+ } else {
+ Group.CanonicalNumberToAggArg.insert(
+ std::make_pair(CanonicalNumber, TypeIndex));
+ Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
+ Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
+ }
OriginalIndex++;
TypeIndex++;
}
@@ -718,10 +837,41 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
/// \param [in] Outputs - The values found by the code extractor.
static void
findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
- ArrayRef<Value *> Outputs) {
+ SetVector<Value *> &Outputs) {
OutlinableGroup &Group = *Region.Parent;
IRSimilarityCandidate &C = *Region.Candidate;
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ C.getBasicBlocks(BBSet, BE);
+
+ // Find the exits to the region.
+ SmallPtrSet<BasicBlock *, 1> Exits;
+ for (BasicBlock *Block : BE)
+ for (BasicBlock *Succ : successors(Block))
+ if (!BBSet.contains(Succ))
+ Exits.insert(Succ);
+
+ // After determining which blocks exit to PHINodes, we add these PHINodes to
+ // the set of outputs to be processed. We also check the incoming values of
+ // the PHINodes for whether they should no longer be considered outputs.
+ for (BasicBlock *ExitBB : Exits) {
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ SmallVector<unsigned, 2> IncomingVals;
+ for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
+ if (BBSet.contains(PN.getIncomingBlock(Idx)))
+ IncomingVals.push_back(Idx);
+
+ // Do not process PHI if there is one (or fewer) predecessor from region.
+ if (IncomingVals.size() <= 1)
+ continue;
+
+ Region.IgnoreRegion = true;
+ return;
+ }
+ }
+
// This counts the argument number in the extracted function.
unsigned OriginalIndex = Region.NumExtractedInputs;
@@ -797,7 +947,7 @@ void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
// Map the outputs found by the CodeExtractor to the arguments found for
// the overall function.
- findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef());
+ findExtractedOutputToOverallOutputMapping(Region, Outputs);
}
/// Replace the extracted function in the Region with a call to the overall
@@ -820,9 +970,10 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
assert(AggFunc && "Function to replace with is nullptr?");
// If the arguments are the same size, there are not values that need to be
- // made argument, or different output registers to handle. We can simply
- // replace the called function in this case.
- if (AggFunc->arg_size() == Call->arg_size()) {
+ // made into an argument, the argument ordering has not been change, or
+ // different output registers to handle. We can simply replace the called
+ // function in this case.
+ if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
<< *AggFunc << " with same number of arguments\n");
Call->setCalledFunction(AggFunc);
@@ -895,6 +1046,9 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// Transfer any debug information.
Call->setDebugLoc(Region.Call->getDebugLoc());
+ // Since our output may determine which branch we go to, we make sure to
+ // propogate this new call value through the module.
+ OldCall->replaceAllUsesWith(Call);
// Remove the old instruction.
OldCall->eraseFromParent();
@@ -913,13 +1067,23 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// region with the arguments of the function for an OutlinableGroup.
//
/// \param [in] Region - The region of extracted code to be changed.
-/// \param [in,out] OutputBB - The BasicBlock for the output stores for this
+/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
/// region.
-static void replaceArgumentUses(OutlinableRegion &Region,
- BasicBlock *OutputBB) {
+/// \param [in] FirstFunction - A flag to indicate whether we are using this
+/// function to define the overall outlined function for all the regions, or
+/// if we are operating on one of the following regions.
+static void
+replaceArgumentUses(OutlinableRegion &Region,
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ bool FirstFunction = false) {
OutlinableGroup &Group = *Region.Parent;
assert(Region.ExtractedFunction && "Region has no extracted function?");
+ Function *DominatingFunction = Region.ExtractedFunction;
+ if (FirstFunction)
+ DominatingFunction = Group.OutlinedFunction;
+ DominatorTree DT(*DominatingFunction);
+
for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
ArgIdx++) {
assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
@@ -946,11 +1110,53 @@ static void replaceArgumentUses(OutlinableRegion &Region,
assert(InstAsUser && "User is nullptr!");
Instruction *I = cast<Instruction>(InstAsUser);
- I->setDebugLoc(DebugLoc());
- LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
- << *OutputBB << "\n");
+ BasicBlock *BB = I->getParent();
+ SmallVector<BasicBlock *, 4> Descendants;
+ DT.getDescendants(BB, Descendants);
+ bool EdgeAdded = false;
+ if (Descendants.size() == 0) {
+ EdgeAdded = true;
+ DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
+ DT.getDescendants(BB, Descendants);
+ }
+
+ // Iterate over the following blocks, looking for return instructions,
+ // if we find one, find the corresponding output block for the return value
+ // and move our store instruction there.
+ for (BasicBlock *DescendBB : Descendants) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
+ if (!RI)
+ continue;
+ Value *RetVal = RI->getReturnValue();
+ auto VBBIt = OutputBBs.find(RetVal);
+ assert(VBBIt != OutputBBs.end() && "Could not find output value!");
+
+ // If this is storing a PHINode, we must make sure it is included in the
+ // overall function.
+ StoreInst *SI = cast<StoreInst>(I);
+
+ Value *ValueOperand = SI->getValueOperand();
+
+ StoreInst *NewI = cast<StoreInst>(I->clone());
+ NewI->setDebugLoc(DebugLoc());
+ BasicBlock *OutputBB = VBBIt->second;
+ OutputBB->getInstList().push_back(NewI);
+ LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
+ << *OutputBB << "\n");
- I->moveBefore(*OutputBB, OutputBB->end());
+ if (FirstFunction)
+ continue;
+ Value *CorrVal =
+ Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
+ assert(CorrVal && "Value is nullptr?");
+ NewI->setOperand(0, CorrVal);
+ }
+
+ // If we added an edge for basic blocks without a predecessor, we remove it
+ // here.
+ if (EdgeAdded)
+ DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
+ I->eraseFromParent();
LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
<< *Region.ExtractedFunction << " with " << *AggArg
@@ -990,69 +1196,53 @@ void replaceConstants(OutlinableRegion &Region) {
}
}
-/// For the given function, find all the nondebug or lifetime instructions,
-/// and return them as a vector. Exclude any blocks in \p ExludeBlocks.
-///
-/// \param [in] F - The function we collect the instructions from.
-/// \param [in] ExcludeBlocks - BasicBlocks to ignore.
-/// \returns the list of instructions extracted.
-static std::vector<Instruction *>
-collectRelevantInstructions(Function &F,
- DenseSet<BasicBlock *> &ExcludeBlocks) {
- std::vector<Instruction *> RelevantInstructions;
-
- for (BasicBlock &BB : F) {
- if (ExcludeBlocks.contains(&BB))
- continue;
-
- for (Instruction &Inst : BB) {
- if (Inst.isLifetimeStartOrEnd())
- continue;
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
-
- RelevantInstructions.push_back(&Inst);
- }
- }
-
- return RelevantInstructions;
-}
-
/// It is possible that there is a basic block that already performs the same
/// stores. This returns a duplicate block, if it exists
///
-/// \param OutputBB [in] the block we are looking for a duplicate of.
+/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
/// \param OutputStoreBBs [in] The existing output blocks.
/// \returns an optional value with the number output block if there is a match.
-Optional<unsigned>
-findDuplicateOutputBlock(BasicBlock *OutputBB,
- ArrayRef<BasicBlock *> OutputStoreBBs) {
+Optional<unsigned> findDuplicateOutputBlock(
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
- bool WrongInst = false;
- bool WrongSize = false;
+ bool Mismatch = false;
unsigned MatchingNum = 0;
- for (BasicBlock *CompBB : OutputStoreBBs) {
- WrongInst = false;
- if (CompBB->size() - 1 != OutputBB->size()) {
- WrongSize = true;
- MatchingNum++;
- continue;
- }
-
- WrongSize = false;
- BasicBlock::iterator NIt = OutputBB->begin();
- for (Instruction &I : *CompBB) {
- if (isa<BranchInst>(&I))
- continue;
+ // We compare the new set output blocks to the other sets of output blocks.
+ // If they are the same number, and have identical instructions, they are
+ // considered to be the same.
+ for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
+ Mismatch = false;
+ for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
+ OutputBBs.find(VToB.first);
+ if (OutputBBIt == OutputBBs.end()) {
+ Mismatch = true;
+ break;
+ }
- if (!I.isIdenticalTo(&(*NIt))) {
- WrongInst = true;
+ BasicBlock *CompBB = VToB.second;
+ BasicBlock *OutputBB = OutputBBIt->second;
+ if (CompBB->size() - 1 != OutputBB->size()) {
+ Mismatch = true;
break;
}
- NIt++;
+ BasicBlock::iterator NIt = OutputBB->begin();
+ for (Instruction &I : *CompBB) {
+ if (isa<BranchInst>(&I))
+ continue;
+
+ if (!I.isIdenticalTo(&(*NIt))) {
+ Mismatch = true;
+ break;
+ }
+
+ NIt++;
+ }
}
- if (!WrongInst && !WrongSize)
+
+ if (!Mismatch)
return MatchingNum;
MatchingNum++;
@@ -1061,95 +1251,130 @@ findDuplicateOutputBlock(BasicBlock *OutputBB,
return None;
}
+/// Remove empty output blocks from the outlined region.
+///
+/// \param BlocksToPrune - Mapping of return values output blocks for the \p
+/// Region.
+/// \param Region - The OutlinableRegion we are analyzing.
+static bool
+analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
+ OutlinableRegion &Region) {
+ bool AllRemoved = true;
+ Value *RetValueForBB;
+ BasicBlock *NewBB;
+ SmallVector<Value *, 4> ToRemove;
+ // Iterate over the output blocks created in the outlined section.
+ for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
+ RetValueForBB = VtoBB.first;
+ NewBB = VtoBB.second;
+
+ // If there are no instructions, we remove it from the module, and also
+ // mark the value for removal from the return value to output block mapping.
+ if (NewBB->size() == 0) {
+ NewBB->eraseFromParent();
+ ToRemove.push_back(RetValueForBB);
+ continue;
+ }
+
+ // Mark that we could not remove all the blocks since they were not all
+ // empty.
+ AllRemoved = false;
+ }
+
+ // Remove the return value from the mapping.
+ for (Value *V : ToRemove)
+ BlocksToPrune.erase(V);
+
+ // Mark the region as having the no output scheme.
+ if (AllRemoved)
+ Region.OutputBlockNum = -1;
+
+ return AllRemoved;
+}
+
/// For the outlined section, move needed the StoreInsts for the output
/// registers into their own block. Then, determine if there is a duplicate
/// output block already created.
///
/// \param [in] OG - The OutlinableGroup of regions to be outlined.
/// \param [in] Region - The OutlinableRegion that is being analyzed.
-/// \param [in,out] OutputBB - the block that stores for this region will be
+/// \param [in,out] OutputBBs - the blocks that stores for this region will be
/// placed in.
-/// \param [in] EndBB - the final block of the extracted function.
+/// \param [in] EndBBs - the final blocks of the extracted function.
/// \param [in] OutputMappings - OutputMappings the mapping of values that have
/// been replaced by a new output value.
/// \param [in,out] OutputStoreBBs - The existing output blocks.
-static void
-alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
- BasicBlock *OutputBB, BasicBlock *EndBB,
- const DenseMap<Value *, Value *> &OutputMappings,
- std::vector<BasicBlock *> &OutputStoreBBs) {
- DenseSet<unsigned> ValuesToFind(Region.GVNStores.begin(),
- Region.GVNStores.end());
-
- // We iterate over the instructions in the extracted function, and find the
- // global value number of the instructions. If we find a value that should
- // be contained in a store, we replace the uses of the value with the value
- // from the overall function, so that the store is storing the correct
- // value from the overall function.
- DenseSet<BasicBlock *> ExcludeBBs(OutputStoreBBs.begin(),
- OutputStoreBBs.end());
- ExcludeBBs.insert(OutputBB);
- std::vector<Instruction *> ExtractedFunctionInsts =
- collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs);
- std::vector<Instruction *> OverallFunctionInsts =
- collectRelevantInstructions(*OG.OutlinedFunction, ExcludeBBs);
-
- assert(ExtractedFunctionInsts.size() == OverallFunctionInsts.size() &&
- "Number of relevant instructions not equal!");
-
- unsigned NumInstructions = ExtractedFunctionInsts.size();
- for (unsigned Idx = 0; Idx < NumInstructions; Idx++) {
- Value *V = ExtractedFunctionInsts[Idx];
-
- if (OutputMappings.find(V) != OutputMappings.end())
- V = OutputMappings.find(V)->second;
- Optional<unsigned> GVN = Region.Candidate->getGVN(V);
-
- // If we have found one of the stored values for output, replace the value
- // with the corresponding one from the overall function.
- if (GVN.hasValue() && ValuesToFind.erase(GVN.getValue())) {
- V->replaceAllUsesWith(OverallFunctionInsts[Idx]);
- if (ValuesToFind.size() == 0)
- break;
- }
-
- if (ValuesToFind.size() == 0)
- break;
- }
-
- assert(ValuesToFind.size() == 0 && "Not all store values were handled!");
-
- // If the size of the block is 0, then there are no stores, and we do not
- // need to save this block.
- if (OutputBB->size() == 0) {
- Region.OutputBlockNum = -1;
- OutputBB->eraseFromParent();
+static void alignOutputBlockWithAggFunc(
+ OutlinableGroup &OG, OutlinableRegion &Region,
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ DenseMap<Value *, BasicBlock *> &EndBBs,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
+ // If none of the output blocks have any instructions, this means that we do
+ // not have to determine if it matches any of the other output schemes, and we
+ // don't have to do anything else.
+ if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
return;
- }
- // Determine is there is a duplicate block.
+ // Determine is there is a duplicate set of blocks.
Optional<unsigned> MatchingBB =
- findDuplicateOutputBlock(OutputBB, OutputStoreBBs);
+ findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
- // If there is, we remove the new output block. If it does not,
- // we add it to our list of output blocks.
+ // If there is, we remove the new output blocks. If it does not,
+ // we add it to our list of sets of output blocks.
if (MatchingBB.hasValue()) {
LLVM_DEBUG(dbgs() << "Set output block for region in function"
<< Region.ExtractedFunction << " to "
<< MatchingBB.getValue());
Region.OutputBlockNum = MatchingBB.getValue();
- OutputBB->eraseFromParent();
+ for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
+ VtoBB.second->eraseFromParent();
return;
}
Region.OutputBlockNum = OutputStoreBBs.size();
- LLVM_DEBUG(dbgs() << "Create output block for region in"
- << Region.ExtractedFunction << " to "
- << *OutputBB);
- OutputStoreBBs.push_back(OutputBB);
- BranchInst::Create(EndBB, OutputBB);
+ Value *RetValueForBB;
+ BasicBlock *NewBB;
+ OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
+ for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
+ RetValueForBB = VtoBB.first;
+ NewBB = VtoBB.second;
+ DenseMap<Value *, BasicBlock *>::iterator VBBIt =
+ EndBBs.find(RetValueForBB);
+ LLVM_DEBUG(dbgs() << "Create output block for region in"
+ << Region.ExtractedFunction << " to "
+ << *NewBB);
+ BranchInst::Create(VBBIt->second, NewBB);
+ OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
+ }
+}
+
+/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
+/// before creating a basic block for each \p NewMap, and inserting into the new
+/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
+///
+/// \param OldMap [in] - The mapping to base the new mapping off of.
+/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
+/// \param ParentFunc [in] - The function to put the new basic block in.
+/// \param BaseName [in] - The start of the BasicBlock names to be appended to
+/// by an index value.
+static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
+ DenseMap<Value *, BasicBlock *> &NewMap,
+ Function *ParentFunc, Twine BaseName) {
+ unsigned Idx = 0;
+ std::vector<Value *> SortedKeys;
+
+ getSortedConstantKeys(SortedKeys, OldMap);
+
+ for (Value *RetVal : SortedKeys) {
+ BasicBlock *NewBB = BasicBlock::Create(
+ ParentFunc->getContext(),
+ Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
+ ParentFunc);
+ NewMap.insert(std::make_pair(RetVal, NewBB));
+ }
}
/// Create the switch statement for outlined function to differentiate between
@@ -1159,50 +1384,74 @@ alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
/// matches the needed stores for the extracted section.
/// \param [in] M - The module we are outlining from.
/// \param [in] OG - The group of regions to be outlined.
-/// \param [in] EndBB - The final block of the extracted function.
+/// \param [in] EndBBs - The final blocks of the extracted function.
/// \param [in,out] OutputStoreBBs - The existing output blocks.
-void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
- ArrayRef<BasicBlock *> OutputStoreBBs) {
+void createSwitchStatement(
+ Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
// We only need the switch statement if there is more than one store
// combination.
if (OG.OutputGVNCombinations.size() > 1) {
Function *AggFunc = OG.OutlinedFunction;
- // Create a final block
- BasicBlock *ReturnBlock =
- BasicBlock::Create(M.getContext(), "final_block", AggFunc);
- Instruction *Term = EndBB->getTerminator();
- Term->moveBefore(*ReturnBlock, ReturnBlock->end());
- // Put the switch statement in the old end basic block for the function with
- // a fall through to the new return block
- LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
- << OutputStoreBBs.size() << "\n");
- SwitchInst *SwitchI =
- SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
- ReturnBlock, OutputStoreBBs.size(), EndBB);
-
- unsigned Idx = 0;
- for (BasicBlock *BB : OutputStoreBBs) {
- SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
- BB);
- Term = BB->getTerminator();
- Term->setSuccessor(0, ReturnBlock);
- Idx++;
+ // Create a final block for each different return block.
+ DenseMap<Value *, BasicBlock *> ReturnBBs;
+ createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
+
+ for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
+ std::pair<Value *, BasicBlock *> &OutputBlock =
+ *OG.EndBBs.find(RetBlockPair.first);
+ BasicBlock *ReturnBlock = RetBlockPair.second;
+ BasicBlock *EndBB = OutputBlock.second;
+ Instruction *Term = EndBB->getTerminator();
+ // Move the return value to the final block instead of the original exit
+ // stub.
+ Term->moveBefore(*ReturnBlock, ReturnBlock->end());
+ // Put the switch statement in the old end basic block for the function
+ // with a fall through to the new return block.
+ LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
+ << OutputStoreBBs.size() << "\n");
+ SwitchInst *SwitchI =
+ SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
+ ReturnBlock, OutputStoreBBs.size(), EndBB);
+
+ unsigned Idx = 0;
+ for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
+ OutputStoreBB.find(OutputBlock.first);
+
+ if (OSBBIt == OutputStoreBB.end())
+ continue;
+
+ BasicBlock *BB = OSBBIt->second;
+ SwitchI->addCase(
+ ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
+ Term = BB->getTerminator();
+ Term->setSuccessor(0, ReturnBlock);
+ Idx++;
+ }
}
return;
}
- // If there needs to be stores, move them from the output block to the end
- // block to save on branching instructions.
+ // If there needs to be stores, move them from the output blocks to their
+ // corresponding ending block.
if (OutputStoreBBs.size() == 1) {
LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
<< *OG.OutlinedFunction << "\n");
- BasicBlock *OutputBlock = OutputStoreBBs[0];
- Instruction *Term = OutputBlock->getTerminator();
- Term->eraseFromParent();
- Term = EndBB->getTerminator();
- moveBBContents(*OutputBlock, *EndBB);
- Term->moveBefore(*EndBB, EndBB->end());
- OutputBlock->eraseFromParent();
+ DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
+ for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
+ DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
+ EndBBs.find(VBPair.first);
+ assert(EndBBIt != EndBBs.end() && "Could not find end block");
+ BasicBlock *EndBB = EndBBIt->second;
+ BasicBlock *OutputBB = VBPair.second;
+ Instruction *Term = OutputBB->getTerminator();
+ Term->eraseFromParent();
+ Term = EndBB->getTerminator();
+ moveBBContents(*OutputBB, *EndBB);
+ Term->moveBefore(*EndBB, EndBB->end());
+ OutputBB->eraseFromParent();
+ }
}
}
@@ -1217,42 +1466,44 @@ void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
/// set of stores needed for the different functions.
/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
/// once outlining is complete.
-static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup,
- std::vector<BasicBlock *> &OutputStoreBBs,
- std::vector<Function *> &FuncsToRemove) {
+static void fillOverallFunction(
+ Module &M, OutlinableGroup &CurrentGroup,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
+ std::vector<Function *> &FuncsToRemove) {
OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
// Move first extracted function's instructions into new function.
LLVM_DEBUG(dbgs() << "Move instructions from "
<< *CurrentOS->ExtractedFunction << " to instruction "
<< *CurrentGroup.OutlinedFunction << "\n");
-
- CurrentGroup.EndBB = moveFunctionData(*CurrentOS->ExtractedFunction,
- *CurrentGroup.OutlinedFunction);
+ moveFunctionData(*CurrentOS->ExtractedFunction,
+ *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
// Transfer the attributes from the function to the new function.
- for (Attribute A :
- CurrentOS->ExtractedFunction->getAttributes().getFnAttributes())
+ for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
CurrentGroup.OutlinedFunction->addFnAttr(A);
- // Create an output block for the first extracted function.
- BasicBlock *NewBB = BasicBlock::Create(
- M.getContext(), Twine("output_block_") + Twine(static_cast<unsigned>(0)),
- CurrentGroup.OutlinedFunction);
+ // Create a new set of output blocks for the first extracted function.
+ DenseMap<Value *, BasicBlock *> NewBBs;
+ createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
+ CurrentGroup.OutlinedFunction, "output_block_0");
CurrentOS->OutputBlockNum = 0;
- replaceArgumentUses(*CurrentOS, NewBB);
+ replaceArgumentUses(*CurrentOS, NewBBs, true);
replaceConstants(*CurrentOS);
- // If the new basic block has no new stores, we can erase it from the module.
- // It it does, we create a branch instruction to the last basic block from the
- // new one.
- if (NewBB->size() == 0) {
- CurrentOS->OutputBlockNum = -1;
- NewBB->eraseFromParent();
- } else {
- BranchInst::Create(CurrentGroup.EndBB, NewBB);
- OutputStoreBBs.push_back(NewBB);
+ // We first identify if any output blocks are empty, if they are we remove
+ // them. We then create a branch instruction to the basic block to the return
+ // block for the function for each non empty output block.
+ if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
+ OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
+ for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator VBBIt =
+ CurrentGroup.EndBBs.find(VToBB.first);
+ BasicBlock *EndBB = VBBIt->second;
+ BranchInst::Create(EndBB, VToBB.second);
+ OutputStoreBBs.back().insert(VToBB);
+ }
}
// Replace the call to the extracted function with the outlined function.
@@ -1268,25 +1519,28 @@ void IROutliner::deduplicateExtractedSections(
std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
createFunction(M, CurrentGroup, OutlinedFunctionNum);
- std::vector<BasicBlock *> OutputStoreBBs;
+ std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
OutlinableRegion *CurrentOS;
fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
+ std::vector<Value *> SortedKeys;
for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
CurrentOS = CurrentGroup.Regions[Idx];
AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
*CurrentOS->ExtractedFunction);
- // Create a new BasicBlock to hold the needed store instructions.
- BasicBlock *NewBB = BasicBlock::Create(
- M.getContext(), "output_block_" + std::to_string(Idx),
- CurrentGroup.OutlinedFunction);
- replaceArgumentUses(*CurrentOS, NewBB);
+ // Create a set of BasicBlocks, one for each return block, to hold the
+ // needed store instructions.
+ DenseMap<Value *, BasicBlock *> NewBBs;
+ createAndInsertBasicBlocks(
+ CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
+ "output_block_" + Twine(static_cast<unsigned>(Idx)));
- alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
- CurrentGroup.EndBB, OutputMappings,
+ replaceArgumentUses(*CurrentOS, NewBBs);
+ alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
+ CurrentGroup.EndBBs, OutputMappings,
OutputStoreBBs);
CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
@@ -1294,11 +1548,78 @@ void IROutliner::deduplicateExtractedSections(
}
// Create a switch statement to handle the different output schemes.
- createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
+ createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
OutlinedFunctionNum++;
}
+/// Checks that the next instruction in the InstructionDataList matches the
+/// next instruction in the module. If they do not, there could be the
+/// possibility that extra code has been inserted, and we must ignore it.
+///
+/// \param ID - The IRInstructionData to check the next instruction of.
+/// \returns true if the InstructionDataList and actual instruction match.
+static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
+ // We check if there is a discrepancy between the InstructionDataList
+ // and the actual next instruction in the module. If there is, it means
+ // that an extra instruction was added, likely by the CodeExtractor.
+
+ // Since we do not have any similarity data about this particular
+ // instruction, we cannot confidently outline it, and must discard this
+ // candidate.
+ IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
+ Instruction *NextIDLInst = NextIDIt->Inst;
+ Instruction *NextModuleInst = nullptr;
+ if (!ID.Inst->isTerminator())
+ NextModuleInst = ID.Inst->getNextNonDebugInstruction();
+ else if (NextIDLInst != nullptr)
+ NextModuleInst =
+ &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
+
+ if (NextIDLInst && NextIDLInst != NextModuleInst)
+ return false;
+
+ return true;
+}
+
+bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
+ const OutlinableRegion &Region) {
+ IRSimilarityCandidate *IRSC = Region.Candidate;
+ unsigned StartIdx = IRSC->getStartIdx();
+ unsigned EndIdx = IRSC->getEndIdx();
+
+ // A check to make sure that we are not about to attempt to outline something
+ // that has already been outlined.
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ if (Outlined.contains(Idx))
+ return false;
+
+ // We check if the recorded instruction matches the actual next instruction,
+ // if it does not, we fix it in the InstructionDataList.
+ if (!Region.Candidate->backInstruction()->isTerminator()) {
+ Instruction *NewEndInst =
+ Region.Candidate->backInstruction()->getNextNonDebugInstruction();
+ assert(NewEndInst && "Next instruction is a nullptr?");
+ if (Region.Candidate->end()->Inst != NewEndInst) {
+ IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
+ IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
+ IRInstructionData(*NewEndInst,
+ InstructionClassifier.visit(*NewEndInst), *IDL);
+
+ // Insert the first IRInstructionData of the new region after the
+ // last IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->end(), *NewEndIRID);
+ }
+ }
+
+ return none_of(*IRSC, [this](IRInstructionData &ID) {
+ if (!nextIRInstructionDataMatchesNextInst(ID))
+ return true;
+
+ return !this->InstructionClassifier.visit(ID.Inst);
+ });
+}
+
void IROutliner::pruneIncompatibleRegions(
std::vector<IRSimilarityCandidate> &CandidateVec,
OutlinableGroup &CurrentGroup) {
@@ -1310,6 +1631,15 @@ void IROutliner::pruneIncompatibleRegions(
return LHS.getStartIdx() < RHS.getStartIdx();
});
+ IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
+ // Since outlining a call and a branch instruction will be the same as only
+ // outlinining a call instruction, we ignore it as a space saving.
+ if (FirstCandidate.getLength() == 2) {
+ if (isa<CallInst>(FirstCandidate.front()->Inst) &&
+ isa<BranchInst>(FirstCandidate.back()->Inst))
+ return;
+ }
+
unsigned CurrentEndIdx = 0;
for (IRSimilarityCandidate &IRSC : CandidateVec) {
PreviouslyOutlined = false;
@@ -1325,9 +1655,13 @@ void IROutliner::pruneIncompatibleRegions(
if (PreviouslyOutlined)
continue;
- // TODO: If in the future we can outline across BasicBlocks, we will need to
- // check all BasicBlocks contained in the region.
- if (IRSC.getStartBB()->hasAddressTaken())
+ // Check over the instructions, and if the basic block has its address
+ // taken for use somewhere else, we do not outline that block.
+ bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
+ return ID.Inst->getParent()->hasAddressTaken();
+ });
+
+ if (BBHasAddressTaken)
continue;
if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
@@ -1340,16 +1674,9 @@ void IROutliner::pruneIncompatibleRegions(
continue;
bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
- // We check if there is a discrepancy between the InstructionDataList
- // and the actual next instruction in the module. If there is, it means
- // that an extra instruction was added, likely by the CodeExtractor.
-
- // Since we do not have any similarity data about this particular
- // instruction, we cannot confidently outline it, and must discard this
- // candidate.
- if (std::next(ID.getIterator())->Inst !=
- ID.Inst->getNextNonDebugInstruction())
+ if (!nextIRInstructionDataMatchesNextInst(ID))
return true;
+
return !this->InstructionClassifier.visit(ID.Inst);
});
@@ -1416,10 +1743,33 @@ static InstructionCost findCostForOutputBlocks(Module &M,
OutlinableGroup &CurrentGroup,
TargetTransformInfo &TTI) {
InstructionCost OutputCost = 0;
+ unsigned NumOutputBranches = 0;
+
+ IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
+ DenseSet<BasicBlock *> CandidateBlocks;
+ Candidate.getBasicBlocks(CandidateBlocks);
+
+ // Count the number of different output branches that point to blocks outside
+ // of the region.
+ DenseSet<BasicBlock *> FoundBlocks;
+ for (IRInstructionData &ID : Candidate) {
+ if (!isa<BranchInst>(ID.Inst))
+ continue;
+
+ for (Value *V : ID.OperVals) {
+ BasicBlock *BB = static_cast<BasicBlock *>(V);
+ DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
+ if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
+ continue;
+ FoundBlocks.insert(BB);
+ NumOutputBranches++;
+ }
+ }
+
+ CurrentGroup.BranchesToOutside = NumOutputBranches;
for (const ArrayRef<unsigned> &OutputUse :
CurrentGroup.OutputGVNCombinations) {
- IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
for (unsigned GVN : OutputUse) {
Optional<Value *> OV = Candidate.fromGVN(GVN);
assert(OV.hasValue() && "Could not find value for GVN?");
@@ -1434,14 +1784,14 @@ static InstructionCost findCostForOutputBlocks(Module &M,
LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
<< " instructions to cost for output of type "
<< *V->getType() << "\n");
- OutputCost += StoreCost;
+ OutputCost += StoreCost * NumOutputBranches;
}
InstructionCost BranchCost =
TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
<< " a branch instruction\n");
- OutputCost += BranchCost;
+ OutputCost += BranchCost * NumOutputBranches;
}
// If there is more than one output scheme, we must have a comparison and
@@ -1460,7 +1810,7 @@ static InstructionCost findCostForOutputBlocks(Module &M,
LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
<< " instructions for each switch case for each different"
<< " output path in a function\n");
- OutputCost += TotalCost;
+ OutputCost += TotalCost * NumOutputBranches;
}
return OutputCost;
@@ -1548,13 +1898,12 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region,
bool IROutliner::extractSection(OutlinableRegion &Region) {
SetVector<Value *> ArgInputs, Outputs, SinkCands;
- Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands);
-
assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
- assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!");
+ BasicBlock *InitialStart = Region.StartBB;
Function *OrigF = Region.StartBB->getParent();
CodeExtractorAnalysisCache CEAC(*OrigF);
- Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
+ Region.ExtractedFunction =
+ Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
// If the extraction was successful, find the BasicBlock, and reassign the
// OutlinableRegion blocks
@@ -1565,7 +1914,23 @@ bool IROutliner::extractSection(OutlinableRegion &Region) {
return false;
}
- BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
+ // Get the block containing the called branch, and reassign the blocks as
+ // necessary. If the original block still exists, it is because we ended on
+ // a branch instruction, and so we move the contents into the block before
+ // and assign the previous block correctly.
+ User *InstAsUser = Region.ExtractedFunction->user_back();
+ BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
+ Region.PrevBB = RewrittenBB->getSinglePredecessor();
+ assert(Region.PrevBB && "PrevBB is nullptr?");
+ if (Region.PrevBB == InitialStart) {
+ BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
+ Instruction *BI = NewPrev->getTerminator();
+ BI->eraseFromParent();
+ moveBBContents(*InitialStart, *NewPrev);
+ Region.PrevBB = NewPrev;
+ InitialStart->eraseFromParent();
+ }
+
Region.StartBB = RewrittenBB;
Region.EndBB = RewrittenBB;
@@ -1608,6 +1973,7 @@ bool IROutliner::extractSection(OutlinableRegion &Region) {
unsigned IROutliner::doOutline(Module &M) {
// Find the possible similarity sections.
+ InstructionClassifier.EnableBranches = !DisableBranches;
IRSimilarityIdentifier &Identifier = getIRSI(M);
SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
@@ -1622,12 +1988,17 @@ unsigned IROutliner::doOutline(Module &M) {
return LHS[0].getLength() * LHS.size() >
RHS[0].getLength() * RHS.size();
});
+ // Creating OutlinableGroups for each SimilarityCandidate to be used in
+ // each of the following for loops to avoid making an allocator.
+ std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
DenseSet<unsigned> NotSame;
- std::vector<Function *> FuncsToRemove;
+ std::vector<OutlinableGroup *> NegativeCostGroups;
+ std::vector<OutlinableRegion *> OutlinedRegions;
// Iterate over the possible sets of similarity.
+ unsigned PotentialGroupIdx = 0;
for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
- OutlinableGroup CurrentGroup;
+ OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
// Remove entries that were previously outlined
pruneIncompatibleRegions(CandidateVec, CurrentGroup);
@@ -1649,20 +2020,31 @@ unsigned IROutliner::doOutline(Module &M) {
// Create a CodeExtractor for each outlinable region. Identify inputs and
// outputs for each section using the code extractor and create the argument
// types for the Aggregate Outlining Function.
- std::vector<OutlinableRegion *> OutlinedRegions;
+ OutlinedRegions.clear();
for (OutlinableRegion *OS : CurrentGroup.Regions) {
// Break the outlinable region out of its parent BasicBlock into its own
// BasicBlocks (see function implementation).
OS->splitCandidate();
- std::vector<BasicBlock *> BE = {OS->StartBB};
+
+ // There's a chance that when the region is split, extra instructions are
+ // added to the region. This makes the region no longer viable
+ // to be split, so we ignore it for outlining.
+ if (!OS->CandidateSplit)
+ continue;
+
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ OS->Candidate->getBasicBlocks(BBSet, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
false, "outlined");
findAddInputsOutputs(M, *OS, NotSame);
if (!OS->IgnoreRegion)
OutlinedRegions.push_back(OS);
- else
- OS->reattachCandidate();
+
+ // We recombine the blocks together now that we have gathered all the
+ // needed information.
+ OS->reattachCandidate();
}
CurrentGroup.Regions = std::move(OutlinedRegions);
@@ -1675,12 +2057,11 @@ unsigned IROutliner::doOutline(Module &M) {
if (CostModel)
findCostBenefit(M, CurrentGroup);
- // If we are adhering to the cost model, reattach all the candidates
+ // If we are adhering to the cost model, skip those groups where the cost
+ // outweighs the benefits.
if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
- for (OutlinableRegion *OS : CurrentGroup.Regions)
- OS->reattachCandidate();
- OptimizationRemarkEmitter &ORE = getORE(
- *CurrentGroup.Regions[0]->Candidate->getFunction());
+ OptimizationRemarkEmitter &ORE =
+ getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
ORE.emit([&]() {
IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
@@ -1704,12 +2085,70 @@ unsigned IROutliner::doOutline(Module &M) {
continue;
}
+ NegativeCostGroups.push_back(&CurrentGroup);
+ }
+
+ ExtractorAllocator.DestroyAll();
+
+ if (NegativeCostGroups.size() > 1)
+ stable_sort(NegativeCostGroups,
+ [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
+ return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
+ });
+
+ std::vector<Function *> FuncsToRemove;
+ for (OutlinableGroup *CG : NegativeCostGroups) {
+ OutlinableGroup &CurrentGroup = *CG;
+
+ OutlinedRegions.clear();
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ // We check whether our region is compatible with what has already been
+ // outlined, and whether we need to ignore this item.
+ if (!isCompatibleWithAlreadyOutlinedCode(*Region))
+ continue;
+ OutlinedRegions.push_back(Region);
+ }
+
+ if (OutlinedRegions.size() < 2)
+ continue;
+
+ // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
+ // we are still outlining enough regions to make up for the added cost.
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+ if (CostModel) {
+ CurrentGroup.Benefit = 0;
+ CurrentGroup.Cost = 0;
+ findCostBenefit(M, CurrentGroup);
+ if (CurrentGroup.Cost >= CurrentGroup.Benefit)
+ continue;
+ }
+ OutlinedRegions.clear();
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ Region->splitCandidate();
+ if (!Region->CandidateSplit)
+ continue;
+ OutlinedRegions.push_back(Region);
+ }
+
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+ if (CurrentGroup.Regions.size() < 2) {
+ for (OutlinableRegion *R : CurrentGroup.Regions)
+ R->reattachCandidate();
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
<< " and benefit " << CurrentGroup.Benefit << "\n");
// Create functions out of all the sections, and mark them as outlined.
OutlinedRegions.clear();
for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ OS->Candidate->getBasicBlocks(BBSet, BE);
+ OS->CE = new (ExtractorAllocator.Allocate())
+ CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
+ false, "outlined");
bool FunctionOutlined = extractSection(*OS);
if (FunctionOutlined) {
unsigned StartIdx = OS->Candidate->getStartIdx();
@@ -1767,6 +2206,7 @@ bool IROutliner::run(Module &M) {
}
// Pass Manager Boilerplate
+namespace {
class IROutlinerLegacyPass : public ModulePass {
public:
static char ID;
@@ -1782,6 +2222,7 @@ public:
bool runOnModule(Module &M) override;
};
+} // namespace
bool IROutlinerLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
index 59260af88832..992c2b292e1e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -31,9 +31,11 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InlineOrder.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
@@ -96,9 +98,53 @@ static cl::opt<std::string> CGSCCInlineReplayFile(
"cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
"Optimization remarks file containing inline remarks to be replayed "
- "by inlining from cgscc inline remarks."),
+ "by cgscc inlining."),
cl::Hidden);
+static cl::opt<ReplayInlinerSettings::Scope> CGSCCInlineReplayScope(
+ "cgscc-inline-replay-scope",
+ cl::init(ReplayInlinerSettings::Scope::Function),
+ cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
+ "Replay on functions that have remarks associated "
+ "with them (default)"),
+ clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
+ "Replay on the entire module")),
+ cl::desc("Whether inline replay should be applied to the entire "
+ "Module or just the Functions (default) that are present as "
+ "callers in remarks during cgscc inlining."),
+ cl::Hidden);
+
+static cl::opt<ReplayInlinerSettings::Fallback> CGSCCInlineReplayFallback(
+ "cgscc-inline-replay-fallback",
+ cl::init(ReplayInlinerSettings::Fallback::Original),
+ cl::values(
+ clEnumValN(
+ ReplayInlinerSettings::Fallback::Original, "Original",
+ "All decisions not in replay send to original advisor (default)"),
+ clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
+ "AlwaysInline", "All decisions not in replay are inlined"),
+ clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
+ "All decisions not in replay are not inlined")),
+ cl::desc(
+ "How cgscc inline replay treats sites that don't come from the replay. "
+ "Original: defers to original advisor, AlwaysInline: inline all sites "
+ "not in replay, NeverInline: inline no sites not in replay"),
+ cl::Hidden);
+
+static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
+ "cgscc-inline-replay-format",
+ cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
+ cl::values(
+ clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
+ clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
+ "<Line Number>:<Column Number>"),
+ clEnumValN(CallSiteFormat::Format::LineDiscriminator,
+ "LineDiscriminator", "<Line Number>.<Discriminator>"),
+ clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
+ "LineColumnDiscriminator",
+ "<Line Number>:<Column Number>.<Discriminator> (default)")),
+ cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
+
static cl::opt<bool> InlineEnablePriorityOrder(
"inline-enable-priority-order", cl::Hidden, cl::init(false),
cl::desc("Enable the priority inline order for the inliner"));
@@ -463,7 +509,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
}
++NumInlined;
- emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
@@ -661,9 +707,12 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
if (!CGSCCInlineReplayFile.empty())
- OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
+ OwnedAdvisor = getReplayInlineAdvisor(
M, FAM, M.getContext(), std::move(OwnedAdvisor),
- CGSCCInlineReplayFile,
+ ReplayInlinerSettings{CGSCCInlineReplayFile,
+ CGSCCInlineReplayScope,
+ CGSCCInlineReplayFallback,
+ {CGSCCInlineReplayFormat}},
/*EmitRemarks=*/true);
return *OwnedAdvisor;
@@ -674,153 +723,6 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
return *IAA->getAdvisor();
}
-template <typename T> class InlineOrder {
-public:
- using reference = T &;
- using const_reference = const T &;
-
- virtual ~InlineOrder() {}
-
- virtual size_t size() = 0;
-
- virtual void push(const T &Elt) = 0;
-
- virtual T pop() = 0;
-
- virtual const_reference front() = 0;
-
- virtual void erase_if(function_ref<bool(T)> Pred) = 0;
-
- bool empty() { return !size(); }
-};
-
-template <typename T, typename Container = SmallVector<T, 16>>
-class DefaultInlineOrder : public InlineOrder<T> {
- using reference = T &;
- using const_reference = const T &;
-
-public:
- size_t size() override { return Calls.size() - FirstIndex; }
-
- void push(const T &Elt) override { Calls.push_back(Elt); }
-
- T pop() override {
- assert(size() > 0);
- return Calls[FirstIndex++];
- }
-
- const_reference front() override {
- assert(size() > 0);
- return Calls[FirstIndex];
- }
-
- void erase_if(function_ref<bool(T)> Pred) override {
- Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred),
- Calls.end());
- }
-
-private:
- Container Calls;
- size_t FirstIndex = 0;
-};
-
-class Priority {
-public:
- Priority(int Size) : Size(Size) {}
-
- static bool isMoreDesirable(const Priority &S1, const Priority &S2) {
- return S1.Size < S2.Size;
- }
-
- static Priority evaluate(CallBase *CB) {
- Function *Callee = CB->getCalledFunction();
- return Priority(Callee->getInstructionCount());
- }
-
- int Size;
-};
-
-template <typename PriorityT>
-class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
- using T = std::pair<CallBase *, int>;
- using HeapT = std::pair<CallBase *, PriorityT>;
- using reference = T &;
- using const_reference = const T &;
-
- static bool cmp(const HeapT &P1, const HeapT &P2) {
- return PriorityT::isMoreDesirable(P2.second, P1.second);
- }
-
- // A call site could become less desirable for inlining because of the size
- // growth from prior inlining into the callee. This method is used to lazily
- // update the desirability of a call site if it's decreasing. It is only
- // called on pop() or front(), not every time the desirability changes. When
- // the desirability of the front call site decreases, an updated one would be
- // pushed right back into the heap. For simplicity, those cases where
- // the desirability of a call site increases are ignored here.
- void adjust() {
- bool Changed = false;
- do {
- CallBase *CB = Heap.front().first;
- const PriorityT PreviousGoodness = Heap.front().second;
- const PriorityT CurrentGoodness = PriorityT::evaluate(CB);
- Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness);
- if (Changed) {
- std::pop_heap(Heap.begin(), Heap.end(), cmp);
- Heap.pop_back();
- Heap.push_back({CB, CurrentGoodness});
- std::push_heap(Heap.begin(), Heap.end(), cmp);
- }
- } while (Changed);
- }
-
-public:
- size_t size() override { return Heap.size(); }
-
- void push(const T &Elt) override {
- CallBase *CB = Elt.first;
- const int InlineHistoryID = Elt.second;
- const PriorityT Goodness = PriorityT::evaluate(CB);
-
- Heap.push_back({CB, Goodness});
- std::push_heap(Heap.begin(), Heap.end(), cmp);
- InlineHistoryMap[CB] = InlineHistoryID;
- }
-
- T pop() override {
- assert(size() > 0);
- adjust();
-
- CallBase *CB = Heap.front().first;
- T Result = std::make_pair(CB, InlineHistoryMap[CB]);
- InlineHistoryMap.erase(CB);
- std::pop_heap(Heap.begin(), Heap.end(), cmp);
- Heap.pop_back();
- return Result;
- }
-
- const_reference front() override {
- assert(size() > 0);
- adjust();
-
- CallBase *CB = Heap.front().first;
- return *InlineHistoryMap.find(CB);
- }
-
- void erase_if(function_ref<bool(T)> Pred) override {
- auto PredWrapper = [=](HeapT P) -> bool {
- return Pred(std::make_pair(P.first, 0));
- };
- Heap.erase(std::remove_if(Heap.begin(), Heap.end(), PredWrapper),
- Heap.end());
- std::make_heap(Heap.begin(), Heap.end(), cmp);
- }
-
-private:
- SmallVector<HeapT, 16> Heap;
- DenseMap<CallBase *, int> InlineHistoryMap;
-};
-
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
@@ -868,7 +770,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// incrementally maknig a single function grow in a super linear fashion.
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
if (InlineEnablePriorityOrder)
- Calls = std::make_unique<PriorityInlineOrder<Priority>>();
+ Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
else
Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
assert(Calls != nullptr && "Expected an initialized InlineOrder");
@@ -972,8 +874,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
- auto Advice = Advisor.getAdvice(*CB, OnlyMandatory);
+ std::unique_ptr<InlineAdvice> Advice =
+ Advisor.getAdvice(*CB, OnlyMandatory);
+
// Check whether we want to inline this callsite.
+ if (!Advice)
+ continue;
+
if (!Advice->isInliningRecommended()) {
Advice->recordUnattemptedInlining();
continue;
@@ -1104,6 +1011,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
UR.InlinedInternalEdges.insert({&N, OldC});
}
InlinedCallees.clear();
+
+ // Invalidate analyses for this function now so that we don't have to
+ // invalidate analyses for all functions in this SCC later.
+ FAM.invalidate(F, PreservedAnalyses::none());
}
// Now that we've finished inlining all of the calls across this SCC, delete
@@ -1147,10 +1058,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!Changed)
return PreservedAnalyses::all();
+ PreservedAnalyses PA;
// Even if we change the IR, we update the core CGSCC data structures and so
// can preserve the proxy to the function analysis manager.
- PreservedAnalyses PA;
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We have already invalidated all analyses on modified functions.
+ PA.preserveSet<AllAnalysesOn<Function>>();
return PA;
}
@@ -1173,7 +1086,11 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
- if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) {
+ if (!IAA.tryCreate(Params, Mode,
+ {CGSCCInlineReplayFile,
+ CGSCCInlineReplayScope,
+ CGSCCInlineReplayFallback,
+ {CGSCCInlineReplayFormat}})) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
@@ -1192,10 +1109,39 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
else
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
+
+ MPM.addPass(std::move(AfterCGMPM));
MPM.run(M, MAM);
- IAA.clear();
+ // Discard the InlineAdvisor, a subsequent inlining session should construct
+ // its own.
+ auto PA = PreservedAnalyses::all();
+ PA.abandon<InlineAdvisorAnalysis>();
+ return PA;
+}
- // The ModulePassManager has already taken care of invalidating analyses.
- return PreservedAnalyses::all();
+void InlinerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<InlinerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ if (OnlyMandatory)
+ OS << "<only-mandatory>";
+}
+
+void ModuleInlinerWrapperPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ // Print some info about passes added to the wrapper. This is however
+ // incomplete as InlineAdvisorAnalysis part isn't included (which also depends
+ // on Params and Mode).
+ if (!MPM.isEmpty()) {
+ MPM.printPipeline(OS, MapClassName2PassName);
+ OS << ",";
+ }
+ OS << "cgscc(";
+ if (MaxDevirtIterations != 0)
+ OS << "devirt<" << MaxDevirtIterations << ">(";
+ PM.printPipeline(OS, MapClassName2PassName);
+ if (MaxDevirtIterations != 0)
+ OS << ")";
+ OS << ")";
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp
index db3b4384ce67..692e445cb7cb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -201,21 +201,6 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
AlwaysPreserved.insert(V->getName());
}
- // Mark all functions not in the api as internal.
- IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
- for (Function &I : M) {
- if (!maybeInternalize(I, ComdatMap))
- continue;
- Changed = true;
-
- if (ExternalNode)
- // Remove a callgraph edge from the external node to this function.
- ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
-
- ++NumFunctions;
- LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
- }
-
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
// FIXME: Shouldn't this just filter on llvm.metadata section??
@@ -237,6 +222,21 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
else
AlwaysPreserved.insert("__stack_chk_guard");
+ // Mark all functions not in the api as internal.
+ IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
+ for (Function &I : M) {
+ if (!maybeInternalize(I, ComdatMap))
+ continue;
+ Changed = true;
+
+ if (ExternalNode)
+ // Remove a callgraph edge from the external node to this function.
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
+
+ ++NumFunctions;
+ LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
+ }
+
// Mark all global variables with initializers that are not in the api as
// internal as well.
for (auto &GV : M.globals()) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index a497c0390bce..d9a59dd35fde 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -283,3 +283,13 @@ PreservedAnalyses LoopExtractorPass::run(Module &M, ModuleAnalysisManager &AM) {
PA.preserve<LoopAnalysis>();
return PA;
}
+
+void LoopExtractorPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopExtractorPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (NumLoops == 1)
+ OS << "single";
+ OS << ">";
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index b492b200c6d5..f78971f0e586 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -342,7 +342,8 @@ private:
struct ScopedSaveAliaseesAndUsed {
Module &M;
SmallVector<GlobalValue *, 4> Used, CompilerUsed;
- std::vector<std::pair<GlobalIndirectSymbol *, Function *>> FunctionAliases;
+ std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases;
+ std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs;
ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
// The users of this class want to replace all function references except
@@ -362,13 +363,16 @@ struct ScopedSaveAliaseesAndUsed {
if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true))
GV->eraseFromParent();
- for (auto &GIS : concat<GlobalIndirectSymbol>(M.aliases(), M.ifuncs())) {
+ for (auto &GA : M.aliases()) {
// FIXME: This should look past all aliases not just interposable ones,
// see discussion on D65118.
- if (auto *F =
- dyn_cast<Function>(GIS.getIndirectSymbol()->stripPointerCasts()))
- FunctionAliases.push_back({&GIS, F});
+ if (auto *F = dyn_cast<Function>(GA.getAliasee()->stripPointerCasts()))
+ FunctionAliases.push_back({&GA, F});
}
+
+ for (auto &GI : M.ifuncs())
+ if (auto *F = dyn_cast<Function>(GI.getResolver()->stripPointerCasts()))
+ ResolverIFuncs.push_back({&GI, F});
}
~ScopedSaveAliaseesAndUsed() {
@@ -376,8 +380,15 @@ struct ScopedSaveAliaseesAndUsed {
appendToCompilerUsed(M, CompilerUsed);
for (auto P : FunctionAliases)
- P.first->setIndirectSymbol(
+ P.first->setAliasee(
ConstantExpr::getBitCast(P.second, P.first->getType()));
+
+ for (auto P : ResolverIFuncs) {
+ // This does not preserve pointer casts that may have been stripped by the
+ // constructor, but the resolver's type is different from that of the
+ // ifunc anyway.
+ P.first->setResolver(P.second);
+ }
}
};
@@ -1550,17 +1561,28 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
ConstantInt::get(IntPtrTy, I)}),
F->getType());
- if (Functions[I]->isExported()) {
- if (IsJumpTableCanonical) {
- ExportSummary->cfiFunctionDefs().insert(std::string(F->getName()));
- } else {
- GlobalAlias *JtAlias = GlobalAlias::create(
- F->getValueType(), 0, GlobalValue::ExternalLinkage,
- F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
+
+ const bool IsExported = Functions[I]->isExported();
+ if (!IsJumpTableCanonical) {
+ GlobalValue::LinkageTypes LT = IsExported
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::InternalLinkage;
+ GlobalAlias *JtAlias = GlobalAlias::create(F->getValueType(), 0, LT,
+ F->getName() + ".cfi_jt",
+ CombinedGlobalElemPtr, &M);
+ if (IsExported)
JtAlias->setVisibility(GlobalValue::HiddenVisibility);
+ else
+ appendToUsed(M, {JtAlias});
+ }
+
+ if (IsExported) {
+ if (IsJumpTableCanonical)
+ ExportSummary->cfiFunctionDefs().insert(std::string(F->getName()));
+ else
ExportSummary->cfiFunctionDecls().insert(std::string(F->getName()));
- }
}
+
if (!IsJumpTableCanonical) {
if (F->hasExternalWeakLinkage())
replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr,
@@ -1751,11 +1773,7 @@ static bool isDirectCall(Use& U) {
void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
bool IsJumpTableCanonical) {
SmallSetVector<Constant *, 4> Constants;
- auto UI = Old->use_begin(), E = Old->use_end();
- for (; UI != E;) {
- Use &U = *UI;
- ++UI;
-
+ for (Use &U : llvm::make_early_inc_range(Old->uses())) {
// Skip block addresses
if (isa<BlockAddress>(U.getUser()))
continue;
@@ -1792,12 +1810,11 @@ bool LowerTypeTestsModule::lower() {
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
if (DropTypeTests && TypeTestFunc) {
- for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
- UI != UE;) {
- auto *CI = cast<CallInst>((*UI++).getUser());
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = cast<CallInst>(U.getUser());
// Find and erase llvm.assume intrinsics for this llvm.type.test call.
- for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;)
- if (auto *Assume = dyn_cast<AssumeInst>((*CIU++).getUser()))
+ for (Use &CIU : llvm::make_early_inc_range(CI->uses()))
+ if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser()))
Assume->eraseFromParent();
// If the assume was merged with another assume, we might have a use on a
// phi (which will feed the assume). Simply replace the use on the phi
@@ -1835,13 +1852,9 @@ bool LowerTypeTestsModule::lower() {
return false;
if (ImportSummary) {
- if (TypeTestFunc) {
- for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
- UI != UE;) {
- auto *CI = cast<CallInst>((*UI++).getUser());
- importTypeTest(CI);
- }
- }
+ if (TypeTestFunc)
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses()))
+ importTypeTest(cast<CallInst>(U.getUser()));
if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty())
report_fatal_error(
@@ -2100,11 +2113,11 @@ bool LowerTypeTestsModule::lower() {
auto CI = cast<CallInst>(U.getUser());
std::vector<GlobalTypeMember *> Targets;
- if (CI->getNumArgOperands() % 2 != 1)
+ if (CI->arg_size() % 2 != 1)
report_fatal_error("number of arguments should be odd");
GlobalClassesTy::member_iterator CurSet;
- for (unsigned I = 1; I != CI->getNumArgOperands(); I += 2) {
+ for (unsigned I = 1; I != CI->arg_size(); I += 2) {
int64_t Offset;
auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
CI->getOperand(I), Offset, M.getDataLayout()));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 9e6dd879ac01..97ef872c5499 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -463,17 +463,15 @@ bool MergeFunctions::runOnModule(Module &M) {
// Replace direct callers of Old with New.
void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
- for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
- Use *U = &*UI;
- ++UI;
- CallBase *CB = dyn_cast<CallBase>(U->getUser());
- if (CB && CB->isCallee(U)) {
+ for (Use &U : llvm::make_early_inc_range(Old->uses())) {
+ CallBase *CB = dyn_cast<CallBase>(U.getUser());
+ if (CB && CB->isCallee(&U)) {
// Do not copy attributes from the called function to the call-site.
// Function comparison ensures that the attributes are the same up to
// type congruences in byval(), in which case we need to keep the byval
// type of the call-site, not the callee function.
remove(CB->getFunction());
- U->set(BitcastNew);
+ U.set(BitcastNew);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ModuleInliner.cpp
new file mode 100644
index 000000000000..ebf080e87c3b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -0,0 +1,354 @@
+//===- ModuleInliner.cpp - Code related to module inliner -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls in the module level. It doesn't need any infromation about
+// SCC or call graph, which is different from the SCC inliner. The decisions of
+// which calls are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ModuleInliner.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InlineOrder.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <cassert>
+#include <functional>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "module-inline"
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+
+static cl::opt<bool> InlineEnablePriorityOrder(
+ "module-inline-enable-priority-order", cl::Hidden, cl::init(true),
+ cl::desc("Enable the priority inline order for the module inliner"));
+
+/// Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool inlineHistoryIncludes(
+ Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
+ FunctionAnalysisManager &FAM,
+ Module &M) {
+ if (OwnedAdvisor)
+ return *OwnedAdvisor;
+
+ auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
+ if (!IAA) {
+ // It should still be possible to run the inliner as a stand-alone module
+ // pass, for test scenarios. In that case, we default to the
+ // DefaultInlineAdvisor, which doesn't need to keep state between module
+ // pass runs. It also uses just the default InlineParams. In this case, we
+ // need to use the provided FAM, which is valid for the duration of the
+ // inliner pass, and thus the lifetime of the owned advisor. The one we
+ // would get from the MAM can be invalidated as a result of the inliner's
+ // activity.
+ OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(M, FAM, Params);
+
+ return *OwnedAdvisor;
+ }
+ assert(IAA->getAdvisor() &&
+ "Expected a present InlineAdvisorAnalysis also have an "
+ "InlineAdvisor initialized");
+ return *IAA->getAdvisor();
+}
+
+static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
+ LibFunc LF;
+
+ // Either this is a normal library function or a "vectorizable"
+ // function. Not using the VFDatabase here because this query
+ // is related only to libraries handled via the TLI.
+ return TLI.getLibFunc(F, LF) ||
+ TLI.isKnownVectorFunctionInLibrary(F.getName());
+}
+
+PreservedAnalyses ModuleInlinerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n");
+
+ auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
+ if (!IAA.tryCreate(Params, Mode, {})) {
+ M.getContext().emitError(
+ "Could not setup Inlining Advisor for the requested "
+ "mode and/or options");
+ return PreservedAnalyses::all();
+ }
+
+ bool Changed = false;
+
+ ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+
+ InlineAdvisor &Advisor = getAdvisor(MAM, FAM, M);
+ Advisor.onPassEntry();
+
+ auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
+
+ // In the module inliner, a priority-based worklist is used for calls across
+ // the entire Module. With this module inliner, the inline order is not
+ // limited to bottom-up order. More globally scope inline order is enabled.
+ // Also, the inline deferral logic become unnecessary in this module inliner.
+ // It is possible to use other priority heuristics, e.g. profile-based
+ // heuristic.
+ //
+ // TODO: Here is a huge amount duplicate code between the module inliner and
+ // the SCC inliner, which need some refactoring.
+ std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
+ if (InlineEnablePriorityOrder)
+ Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
+ else
+ Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
+ assert(Calls != nullptr && "Expected an initialized InlineOrder");
+
+ // Populate the initial list of calls in this module.
+ for (Function &F : M) {
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ // We want to generally process call sites top-down in order for
+ // simplifications stemming from replacing the call with the returned value
+ // after inlining to be visible to subsequent inlining decisions.
+ // FIXME: Using instructions sequence is a really bad way to do this.
+ // Instead we should do an actual RPO walk of the function body.
+ for (Instruction &I : instructions(F))
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *Callee = CB->getCalledFunction()) {
+ if (!Callee->isDeclaration())
+ Calls->push({CB, -1});
+ else if (!isa<IntrinsicInst>(I)) {
+ using namespace ore;
+ setInlineRemark(*CB, "unavailable definition");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
+ << NV("Callee", Callee) << " will not be inlined into "
+ << NV("Caller", CB->getCaller())
+ << " because its definition is unavailable"
+ << setIsVerbose();
+ });
+ }
+ }
+ }
+ if (Calls->empty())
+ return PreservedAnalyses::all();
+
+ // When inlining a callee produces new call sites, we want to keep track of
+ // the fact that they were inlined from the callee. This allows us to avoid
+ // infinite inlining in some obscure cases. To represent this, we use an
+ // index into the InlineHistory vector.
+ SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+ // Track a set vector of inlined callees so that we can augment the caller
+ // with all of their edges in the call graph before pruning out the ones that
+ // got simplified away.
+ SmallSetVector<Function *, 4> InlinedCallees;
+
+ // Track the dead functions to delete once finished with inlining calls. We
+ // defer deleting these to make it easier to handle the call graph updates.
+ SmallVector<Function *, 4> DeadFunctions;
+
+ // Loop forward over all of the calls.
+ while (!Calls->empty()) {
+ // We expect the calls to typically be batched with sequences of calls that
+ // have the same caller, so we first set up some shared infrastructure for
+ // this caller. We also do any pruning we can at this layer on the caller
+ // alone.
+ Function &F = *Calls->front().first->getCaller();
+
+ LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
+ << " Function size: " << F.getInstructionCount()
+ << "\n");
+
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+
+ // Now process as many calls as we have within this caller in the sequence.
+ // We bail out as soon as the caller has to change so we can
+ // prepare the context of that new caller.
+ bool DidInline = false;
+ while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
+ auto P = Calls->pop();
+ CallBase *CB = P.first;
+ const int InlineHistoryID = P.second;
+ Function &Callee = *CB->getCalledFunction();
+
+ if (InlineHistoryID != -1 &&
+ inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(*CB, "recursive");
+ continue;
+ }
+
+ auto Advice = Advisor.getAdvice(*CB, /*OnlyMandatory*/ false);
+ // Check whether we want to inline this callsite.
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ continue;
+ }
+
+ // Setup the data structure used to plumb customization into the
+ // `InlineFunction` routine.
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, GetAssumptionCache, PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
+ InlineResult IR =
+ InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
+ if (!IR.isSuccess()) {
+ Advice->recordUnsuccessfulInlining(IR);
+ continue;
+ }
+
+ DidInline = true;
+ InlinedCallees.insert(&Callee);
+ ++NumInlined;
+
+ LLVM_DEBUG(dbgs() << " Size after inlining: "
+ << F.getInstructionCount() << "\n");
+
+ // Add any new callsites to defined functions to the worklist.
+ if (!IFI.InlinedCallSites.empty()) {
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back({&Callee, InlineHistoryID});
+
+ for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
+ Function *NewCallee = ICB->getCalledFunction();
+ if (!NewCallee) {
+ // Try to promote an indirect (virtual) call without waiting for
+ // the post-inline cleanup and the next DevirtSCCRepeatedPass
+ // iteration because the next iteration may not happen and we may
+ // miss inlining it.
+ if (tryPromoteCall(*ICB))
+ NewCallee = ICB->getCalledFunction();
+ }
+ if (NewCallee)
+ if (!NewCallee->isDeclaration())
+ Calls->push({ICB, NewHistoryID});
+ }
+ }
+
+ // Merge the attributes based on the inlining.
+ AttributeFuncs::mergeAttributesForInlining(F, Callee);
+
+ // For local functions, check whether this makes the callee trivially
+ // dead. In that case, we can drop the body of the function eagerly
+ // which may reduce the number of callers of other functions to one,
+ // changing inline cost thresholds.
+ bool CalleeWasDeleted = false;
+ if (Callee.hasLocalLinkage()) {
+ // To check this we also need to nuke any dead constant uses (perhaps
+ // made dead by this operation on other functions).
+ Callee.removeDeadConstantUsers();
+ // if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
+ if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) {
+ Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
+ return Call.first->getCaller() == &Callee;
+ });
+ // Clear the body and queue the function itself for deletion when we
+ // finish inlining.
+ // Note that after this point, it is an error to do anything other
+ // than use the callee's address or delete it.
+ Callee.dropAllReferences();
+ assert(!is_contained(DeadFunctions, &Callee) &&
+ "Cannot put cause a function to become dead twice!");
+ DeadFunctions.push_back(&Callee);
+ CalleeWasDeleted = true;
+ }
+ }
+ if (CalleeWasDeleted)
+ Advice->recordInliningWithCalleeDeleted();
+ else
+ Advice->recordInlining();
+ }
+
+ if (!DidInline)
+ continue;
+ Changed = true;
+
+ InlinedCallees.clear();
+ }
+
+ // Now that we've finished inlining all of the calls across this module,
+ // delete all of the trivially dead functions.
+ //
+ // Note that this walks a pointer set which has non-deterministic order but
+ // that is OK as all we do is delete things and add pointers to unordered
+ // sets.
+ for (Function *DeadF : DeadFunctions) {
+ // Clear out any cached analyses.
+ FAM.clear(*DeadF, DeadF->getName());
+
+ // And delete the actual function from the module.
+ // The Advisor may use Function pointers to efficiently index various
+ // internal maps, e.g. for memoization. Function cleanup passes like
+ // argument promotion create new functions. It is possible for a new
+ // function to be allocated at the address of a deleted function. We could
+ // index using names, but that's inefficient. Alternatively, we let the
+ // Advisor free the functions when it sees fit.
+ DeadF->getBasicBlockList().clear();
+ M.getFunctionList().remove(DeadF);
+
+ ++NumDeleted;
+ }
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 75eec25f5807..f342c35fa283 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -33,6 +34,8 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
@@ -41,6 +44,8 @@
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <algorithm>
+
using namespace llvm;
using namespace omp;
@@ -72,6 +77,46 @@ static cl::opt<bool> HideMemoryTransferLatency(
" transfers"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> DisableOpenMPOptDeglobalization(
+ "openmp-opt-disable-deglobalization", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving deglobalization."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptSPMDization(
+ "openmp-opt-disable-spmdization", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptFolding(
+ "openmp-opt-disable-folding", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
+ "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations that replace the state machine."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> PrintModuleAfterOptimizations(
+ "openmp-opt-print-module", cl::ZeroOrMore,
+ cl::desc("Print the current module after OpenMP optimizations."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> AlwaysInlineDeviceFunctions(
+ "openmp-opt-inline-device", cl::ZeroOrMore,
+ cl::desc("Inline all applicible functions on the device."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool>
+ EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore,
+ cl::desc("Enables more verbose remarks."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<unsigned>
+ SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
+ cl::desc("Maximal number of attributor iterations."),
+ cl::init(256));
+
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -328,7 +373,7 @@ struct OMPInformationCache : public InformationCache {
if (F->arg_size() != RTFArgTypes.size())
return false;
- auto RTFTyIt = RTFArgTypes.begin();
+ auto *RTFTyIt = RTFArgTypes.begin();
for (Argument &Arg : F->args()) {
if (Arg.getType() != *RTFTyIt)
return false;
@@ -503,7 +548,7 @@ struct KernelInfoState : AbstractState {
/// State to track if we are in SPMD-mode, assumed or know, and why we decided
/// we cannot be. If it is assumed, then RequiresFullRuntime should also be
/// false.
- BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker;
+ BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
/// The __kmpc_target_init call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
@@ -542,7 +587,9 @@ struct KernelInfoState : AbstractState {
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
IsAtFixpoint = true;
+ ReachingKernelEntries.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ ReachedKnownParallelRegions.indicatePessimisticFixpoint();
ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
return ChangeStatus::CHANGED;
}
@@ -550,6 +597,10 @@ struct KernelInfoState : AbstractState {
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
IsAtFixpoint = true;
+ ReachingKernelEntries.indicateOptimisticFixpoint();
+ SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ ReachedKnownParallelRegions.indicateOptimisticFixpoint();
+ ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
@@ -569,6 +620,12 @@ struct KernelInfoState : AbstractState {
return true;
}
+ /// Returns true if this kernel contains any OpenMP parallel regions.
+ bool mayContainParallelRegion() {
+ return !ReachedKnownParallelRegions.empty() ||
+ !ReachedUnknownParallelRegions.empty();
+ }
+
/// Return empty set as the best state of potential values.
static KernelInfoState getBestState() { return KernelInfoState(true); }
@@ -584,12 +641,14 @@ struct KernelInfoState : AbstractState {
// Do not merge two different _init and _deinit call sites.
if (KIS.KernelInitCB) {
if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
- indicatePessimisticFixpoint();
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
+ "assumptions.");
KernelInitCB = KIS.KernelInitCB;
}
if (KIS.KernelDeinitCB) {
if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
- indicatePessimisticFixpoint();
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
+ "assumptions.");
KernelDeinitCB = KIS.KernelDeinitCB;
}
SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
@@ -1032,8 +1091,8 @@ private:
Args.clear();
Args.push_back(OutlinedFn->getArg(0));
Args.push_back(OutlinedFn->getArg(1));
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
+ for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
+ ++U)
Args.push_back(CI->getArgOperand(U));
CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
@@ -1041,9 +1100,9 @@ private:
NewCI->setDebugLoc(CI->getDebugLoc());
// Forward parameter attributes from the callback to the callee.
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
- for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
+ for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
+ ++U)
+ for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
NewCI->addParamAttr(
U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
@@ -1563,13 +1622,13 @@ private:
// TODO: Use dominance to find a good position instead.
auto CanBeMoved = [this](CallBase &CB) {
- unsigned NumArgs = CB.getNumArgOperands();
+ unsigned NumArgs = CB.arg_size();
if (NumArgs == 0)
return true;
if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
return false;
- for (unsigned u = 1; u < NumArgs; ++u)
- if (isa<Instruction>(CB.getArgOperand(u)))
+ for (unsigned U = 1; U < NumArgs; ++U)
+ if (isa<Instruction>(CB.getArgOperand(U)))
return false;
return true;
};
@@ -1612,7 +1671,7 @@ private:
// valid at the new location. For now we just pick a global one, either
// existing and used by one of the calls, or created from scratch.
if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
- if (CI->getNumArgOperands() > 0 &&
+ if (!CI->arg_empty() &&
CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
/* GlobalOnly */ true);
@@ -1695,8 +1754,8 @@ private:
// Transitively search for more arguments by looking at the users of the
// ones we know already. During the search the GTIdArgs vector is extended
// so we cannot cache the size nor can we use a range based for.
- for (unsigned u = 0; u < GTIdArgs.size(); ++u)
- AddUserArgs(*GTIdArgs[u]);
+ for (unsigned U = 0; U < GTIdArgs.size(); ++U)
+ AddUserArgs(*GTIdArgs[U]);
}
/// Kernel (=GPU) optimizations and utility functions
@@ -1822,6 +1881,10 @@ private:
OMPRTL___kmpc_kernel_end_parallel);
ExternalizationRAII BarrierSPMD(OMPInfoCache,
OMPRTL___kmpc_barrier_simple_spmd);
+ ExternalizationRAII BarrierGeneric(OMPInfoCache,
+ OMPRTL___kmpc_barrier_simple_generic);
+ ExternalizationRAII ThreadId(OMPInfoCache,
+ OMPRTL___kmpc_get_hardware_thread_id_in_block);
registerAAs(IsModulePass);
@@ -1918,6 +1981,10 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
if (!KernelParallelRFI)
return Changed;
+ // If we have disabled state machine changes, exit
+ if (DisableOpenMPOptStateMachineRewrite)
+ return Changed;
+
for (Function *F : SCC) {
// Check if the function is a use in a __kmpc_parallel_51 call at
@@ -2509,9 +2576,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
- // Check if the edge into the successor block compares the __kmpc_target_init
- // result with -1. If we are in non-SPMD-mode that signals only the main
- // thread will execute the edge.
+ // Check if the edge into the successor block contains a condition that only
+ // lets the main thread execute it.
auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
if (!Edge || !Edge->isConditional())
return false;
@@ -2526,16 +2592,27 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
if (!C)
return false;
- // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
+ // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
if (C->isAllOnesValue()) {
auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
- const int InitIsSPMDArgNo = 1;
- auto *IsSPMDModeCI =
- dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo));
- return IsSPMDModeCI && IsSPMDModeCI->isZero();
+ const int InitModeArgNo = 1;
+ auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
+ return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
+ }
+
+ if (C->isZero()) {
+ // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+ if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
+ return true;
+
+ // Match: 0 == llvm.amdgcn.workitem.id.x()
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+ if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
+ return true;
}
return false;
@@ -2544,15 +2621,14 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// Merge all the predecessor states into the current basic block. A basic
// block is executed by a single thread if all of its predecessors are.
auto MergePredecessorStates = [&](BasicBlock *BB) {
- if (pred_begin(BB) == pred_end(BB))
+ if (pred_empty(BB))
return SingleThreadedBBs.contains(BB);
bool IsInitialThread = true;
- for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
- PredBB != PredEndBB; ++PredBB) {
- if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()),
BB))
- IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
+ IsInitialThread &= SingleThreadedBBs.contains(PredBB);
}
return IsInitialThread;
@@ -2684,9 +2760,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));
- LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "
- << CB->getCaller()->getName() << " with "
- << AllocSize->getZExtValue()
+ LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
+ << " with " << AllocSize->getZExtValue()
<< " bytes of shared memory\n");
// Create a new shared memory buffer of the same size as the allocation
@@ -2735,7 +2810,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
const auto &ED = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
if (CallBase *CB = dyn_cast<CallBase>(U))
- if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
+ if (!isa<ConstantInt>(CB->getArgOperand(0)) ||
!ED.isExecutedByInitialThreadOnly(*CB))
MallocCalls.erase(CB);
}
@@ -2770,9 +2845,17 @@ struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
: "") +
std::string(" #PRs: ") +
- std::to_string(ReachedKnownParallelRegions.size()) +
+ (ReachedKnownParallelRegions.isValidState()
+ ? std::to_string(ReachedKnownParallelRegions.size())
+ : "<invalid>") +
", #Unknown PRs: " +
- std::to_string(ReachedUnknownParallelRegions.size());
+ (ReachedUnknownParallelRegions.isValidState()
+ ? std::to_string(ReachedUnknownParallelRegions.size())
+ : "<invalid>") +
+ ", #Reaching Kernels: " +
+ (ReachingKernelEntries.isValidState()
+ ? std::to_string(ReachingKernelEntries.size())
+ : "<invalid>");
}
/// Create an abstract attribute biew for the position \p IRP.
@@ -2798,6 +2881,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
: AAKernelInfo(IRP, A) {}
+ SmallPtrSet<Instruction *, 4> GuardedInstructions;
+
+ SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
+ return GuardedInstructions;
+ }
+
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// This is a high-level transform that might change the constant arguments
@@ -2844,8 +2933,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
},
Fn);
- assert((KernelInitCB && KernelDeinitCB) &&
- "Kernel without __kmpc_target_init or __kmpc_target_deinit!");
+ // Ignore kernels without initializers such as global constructors.
+ if (!KernelInitCB || !KernelDeinitCB) {
+ indicateOptimisticFixpoint();
+ return;
+ }
// For kernels we might need to initialize/finalize the IsSPMD state and
// we need to register a simplification callback so that the Attributor
@@ -2860,7 +2952,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
// state. As long as we are not in an invalid state, we will create a
// custom state machine so the value should be a `i1 false`. If we are
// in an invalid state, we won't change the value that is in the IR.
- if (!isValidState())
+ if (!ReachedKnownParallelRegions.isValidState())
+ return nullptr;
+ // If we have disabled state machine rewrites, don't make a custom one.
+ if (DisableOpenMPOptStateMachineRewrite)
return nullptr;
if (AA)
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
@@ -2870,7 +2965,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
return FalseVal;
};
- Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB =
+ Attributor::SimplifictionCallbackTy ModeSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
// IRP represents the "SPMDCompatibilityTracker" argument of an
@@ -2886,8 +2981,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
} else {
UsedAssumedInformation = false;
}
- auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
- SPMDCompatibilityTracker.isAssumed());
+ auto *Val = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
+ SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
+ : OMP_TGT_EXEC_MODE_GENERIC);
return Val;
};
@@ -2912,8 +3009,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
return Val;
};
- constexpr const int InitIsSPMDArgNo = 1;
- constexpr const int DeinitIsSPMDArgNo = 1;
+ constexpr const int InitModeArgNo = 1;
+ constexpr const int DeinitModeArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
constexpr const int InitRequiresFullRuntimeArgNo = 3;
constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
@@ -2921,11 +3018,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo),
- IsSPMDModeSimplifyCB);
+ IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
+ ModeSimplifyCB);
A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
- IsSPMDModeSimplifyCB);
+ IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
+ ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB,
InitRequiresFullRuntimeArgNo),
@@ -2936,10 +3033,25 @@ struct AAKernelInfoFunction : AAKernelInfo {
IsGenericModeSimplifyCB);
// Check if we know we are in SPMD-mode already.
- ConstantInt *IsSPMDArg =
- dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
- if (IsSPMDArg && !IsSPMDArg->isZero())
+ ConstantInt *ModeArg =
+ dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
+ if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ // This is a generic region but SPMDization is disabled so stop tracking.
+ else if (DisableOpenMPOptSPMDization)
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ }
+
+ /// Sanitize the string \p S such that it is a suitable global symbol name.
+ static std::string sanitizeForGlobalName(std::string S) {
+ std::replace_if(
+ S.begin(), S.end(),
+ [](const char C) {
+ return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
+ (C >= '0' && C <= '9') || C == '_');
+ },
+ '.');
+ return S;
}
/// Modify the IR based on the KernelInfoState as the fixpoint iteration is
@@ -2950,19 +3062,16 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!KernelInitCB || !KernelDeinitCB)
return ChangeStatus::UNCHANGED;
- // Known SPMD-mode kernels need no manifest changes.
- if (SPMDCompatibilityTracker.isKnown())
- return ChangeStatus::UNCHANGED;
-
// If we can we change the execution mode to SPMD-mode otherwise we build a
// custom state machine.
- if (!changeToSPMDMode(A))
- buildCustomStateMachine(A);
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ if (!changeToSPMDMode(A, Changed))
+ return buildCustomStateMachine(A);
- return ChangeStatus::CHANGED;
+ return Changed;
}
- bool changeToSPMDMode(Attributor &A) {
+ bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
if (!SPMDCompatibilityTracker.isAssumed()) {
@@ -2994,38 +3103,259 @@ struct AAKernelInfoFunction : AAKernelInfo {
return false;
}
- // Adjust the global exec mode flag that tells the runtime what mode this
- // kernel is executed in.
+ // Check if the kernel is already in SPMD mode, if so, return success.
Function *Kernel = getAnchorScope();
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
- assert(ExecMode->getInitializer() &&
- ExecMode->getInitializer()->isOneValue() &&
- "Initially non-SPMD kernel has SPMD exec mode!");
+ assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
// Set the global exec mode flag to indicate SPMD-Generic mode.
- constexpr int SPMDGeneric = 2;
- if (!ExecMode->getInitializer()->isZeroValue())
- ExecMode->setInitializer(
- ConstantInt::get(ExecMode->getInitializer()->getType(), SPMDGeneric));
+ assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
+ "ExecMode is not an integer!");
+ const int8_t ExecModeVal =
+ cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
+ if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
+ return true;
+
+ // We will now unconditionally modify the IR, indicate a change.
+ Changed = ChangeStatus::CHANGED;
+
+ auto CreateGuardedRegion = [&](Instruction *RegionStartI,
+ Instruction *RegionEndI) {
+ LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
+ MemorySSAUpdater *MSU = nullptr;
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+
+ BasicBlock *ParentBB = RegionStartI->getParent();
+ Function *Fn = ParentBB->getParent();
+ Module &M = *Fn->getParent();
+
+ // Create all the blocks and logic.
+ // ParentBB:
+ // goto RegionCheckTidBB
+ // RegionCheckTidBB:
+ // Tid = __kmpc_hardware_thread_id()
+ // if (Tid != 0)
+ // goto RegionBarrierBB
+ // RegionStartBB:
+ // <execute instructions guarded>
+ // goto RegionEndBB
+ // RegionEndBB:
+ // <store escaping values to shared mem>
+ // goto RegionBarrierBB
+ // RegionBarrierBB:
+ // __kmpc_simple_barrier_spmd()
+ // // second barrier is omitted if lacking escaping values.
+ // <load escaping values from shared mem>
+ // __kmpc_simple_barrier_spmd()
+ // goto RegionExitBB
+ // RegionExitBB:
+ // <execute rest of instructions>
+
+ BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
+ DT, LI, MSU, "region.guarded.end");
+ BasicBlock *RegionBarrierBB =
+ SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
+ MSU, "region.barrier");
+ BasicBlock *RegionExitBB =
+ SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
+ DT, LI, MSU, "region.exit");
+ BasicBlock *RegionStartBB =
+ SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
+
+ assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&
+ "Expected a different CFG");
+
+ BasicBlock *RegionCheckTidBB = SplitBlock(
+ ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
+
+ // Register basic blocks with the Attributor.
+ A.registerManifestAddedBasicBlock(*RegionEndBB);
+ A.registerManifestAddedBasicBlock(*RegionBarrierBB);
+ A.registerManifestAddedBasicBlock(*RegionExitBB);
+ A.registerManifestAddedBasicBlock(*RegionStartBB);
+ A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
+
+ bool HasBroadcastValues = false;
+ // Find escaping outputs from the guarded region to outside users and
+ // broadcast their values to them.
+ for (Instruction &I : *RegionStartBB) {
+ SmallPtrSet<Instruction *, 4> OutsideUsers;
+ for (User *Usr : I.users()) {
+ Instruction &UsrI = *cast<Instruction>(Usr);
+ if (UsrI.getParent() != RegionStartBB)
+ OutsideUsers.insert(&UsrI);
+ }
+
+ if (OutsideUsers.empty())
+ continue;
+
+ HasBroadcastValues = true;
+
+ // Emit a global variable in shared memory to store the broadcasted
+ // value.
+ auto *SharedMem = new GlobalVariable(
+ M, I.getType(), /* IsConstant */ false,
+ GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
+ sanitizeForGlobalName(
+ (I.getName() + ".guarded.output.alloc").str()),
+ nullptr, GlobalValue::NotThreadLocal,
+ static_cast<unsigned>(AddressSpace::Shared));
+
+ // Emit a store instruction to update the value.
+ new StoreInst(&I, SharedMem, RegionEndBB->getTerminator());
+
+ LoadInst *LoadI = new LoadInst(I.getType(), SharedMem,
+ I.getName() + ".guarded.output.load",
+ RegionBarrierBB->getTerminator());
+
+ // Emit a load instruction and replace uses of the output value.
+ for (Instruction *UsrI : OutsideUsers)
+ UsrI->replaceUsesOfWith(&I, LoadI);
+ }
+
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+
+ // Go to tid check BB in ParentBB.
+ const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
+ ParentBB->getTerminator()->eraseFromParent();
+ OpenMPIRBuilder::LocationDescription Loc(
+ InsertPointTy(ParentBB, ParentBB->end()), DL);
+ OMPInfoCache.OMPBuilder.updateToLocation(Loc);
+ auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc);
+ Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr);
+ BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
+
+ // Add check for Tid in RegionCheckTidBB
+ RegionCheckTidBB->getTerminator()->eraseFromParent();
+ OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
+ InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
+ OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
+ FunctionCallee HardwareTidFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ Value *Tid =
+ OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
+ Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
+ OMPInfoCache.OMPBuilder.Builder
+ .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
+ ->setDebugLoc(DL);
+
+ // First barrier for synchronization, ensures main thread has updated
+ // values.
+ FunctionCallee BarrierFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_barrier_simple_spmd);
+ OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
+ RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
+ OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid})
+ ->setDebugLoc(DL);
+
+ // Second barrier ensures workers have read broadcast values.
+ if (HasBroadcastValues)
+ CallInst::Create(BarrierFn, {Ident, Tid}, "",
+ RegionBarrierBB->getTerminator())
+ ->setDebugLoc(DL);
+ };
+
+ auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
+ SmallPtrSet<BasicBlock *, 8> Visited;
+ for (Instruction *GuardedI : SPMDCompatibilityTracker) {
+ BasicBlock *BB = GuardedI->getParent();
+ if (!Visited.insert(BB).second)
+ continue;
+
+ SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
+ Instruction *LastEffect = nullptr;
+ BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
+ while (++IP != IPEnd) {
+ if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
+ continue;
+ Instruction *I = &*IP;
+ if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
+ continue;
+ if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
+ LastEffect = nullptr;
+ continue;
+ }
+ if (LastEffect)
+ Reorders.push_back({I, LastEffect});
+ LastEffect = &*IP;
+ }
+ for (auto &Reorder : Reorders)
+ Reorder.first->moveBefore(Reorder.second);
+ }
+
+ SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
+
+ for (Instruction *GuardedI : SPMDCompatibilityTracker) {
+ BasicBlock *BB = GuardedI->getParent();
+ auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
+ IRPosition::function(*GuardedI->getFunction()), nullptr,
+ DepClassTy::NONE);
+ assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo");
+ auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
+ // Continue if instruction is already guarded.
+ if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
+ continue;
+
+ Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
+ for (Instruction &I : *BB) {
+ // If instruction I needs to be guarded update the guarded region
+ // bounds.
+ if (SPMDCompatibilityTracker.contains(&I)) {
+ CalleeAAFunction.getGuardedInstructions().insert(&I);
+ if (GuardedRegionStart)
+ GuardedRegionEnd = &I;
+ else
+ GuardedRegionStart = GuardedRegionEnd = &I;
+
+ continue;
+ }
+
+ // Instruction I does not need guarding, store
+ // any region found and reset bounds.
+ if (GuardedRegionStart) {
+ GuardedRegions.push_back(
+ std::make_pair(GuardedRegionStart, GuardedRegionEnd));
+ GuardedRegionStart = nullptr;
+ GuardedRegionEnd = nullptr;
+ }
+ }
+ }
+
+ for (auto &GR : GuardedRegions)
+ CreateGuardedRegion(GR.first, GR.second);
+
+ // Adjust the global exec mode flag that tells the runtime what mode this
+ // kernel is executed in.
+ assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
+ "Initially non-SPMD kernel has SPMD exec mode!");
+ ExecMode->setInitializer(
+ ConstantInt::get(ExecMode->getInitializer()->getType(),
+ ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
// Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
- const int InitIsSPMDArgNo = 1;
- const int DeinitIsSPMDArgNo = 1;
+ const int InitModeArgNo = 1;
+ const int DeinitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
const int InitRequiresFullRuntimeArgNo = 3;
const int DeinitRequiresFullRuntimeArgNo = 2;
auto &Ctx = getAnchorValue().getContext();
- A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
- *ConstantInt::getBool(Ctx, 1));
+ A.changeUseAfterManifest(
+ KernelInitCB->getArgOperandUse(InitModeArgNo),
+ *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
+ OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
*ConstantInt::getBool(Ctx, 0));
A.changeUseAfterManifest(
- KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
- *ConstantInt::getBool(Ctx, 1));
+ KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
+ *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
+ OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
*ConstantInt::getBool(Ctx, 0));
@@ -3043,10 +3373,15 @@ struct AAKernelInfoFunction : AAKernelInfo {
};
ChangeStatus buildCustomStateMachine(Attributor &A) {
- assert(ReachedKnownParallelRegions.isValidState() &&
- "Custom state machine with invalid parallel region states?");
+ // If we have disabled state machine rewrites, don't make a custom one
+ if (DisableOpenMPOptStateMachineRewrite)
+ return ChangeStatus::UNCHANGED;
+
+ // Don't rewrite the state machine if we are not in a valid state.
+ if (!ReachedKnownParallelRegions.isValidState())
+ return ChangeStatus::UNCHANGED;
- const int InitIsSPMDArgNo = 1;
+ const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
// Check if the current configuration is non-SPMD and generic state machine.
@@ -3055,14 +3390,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// we give up.
ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
- ConstantInt *IsSPMD =
- dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
+ ConstantInt *Mode =
+ dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
// If we are stuck with generic mode, try to create a custom device (=GPU)
// state machine which is specialized for the parallel regions that are
// reachable by the kernel.
- if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD ||
- !IsSPMD->isZero())
+ if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
+ (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
return ChangeStatus::UNCHANGED;
// If not SPMD mode, indicate we use a custom state machine now.
@@ -3075,8 +3410,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// happen if there simply are no parallel regions. In the resulting kernel
// all worker threads will simply exit right away, leaving the main thread
// to do the work alone.
- if (ReachedKnownParallelRegions.empty() &&
- ReachedUnknownParallelRegions.empty()) {
+ if (!mayContainParallelRegion()) {
++NumOpenMPTargetRegionKernelsWithoutStateMachine;
auto Remark = [&](OptimizationRemark OR) {
@@ -3122,9 +3456,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Create all the blocks:
//
// InitCB = __kmpc_target_init(...)
- // bool IsWorker = InitCB >= 0;
+ // BlockHwSize =
+ // __kmpc_get_hardware_num_threads_in_block();
+ // WarpSize = __kmpc_get_warp_size();
+ // BlockSize = BlockHwSize - WarpSize;
+ // if (InitCB >= BlockSize) return;
+ // IsWorkerCheckBB: bool IsWorker = InitCB >= 0;
// if (IsWorker) {
- // SMBeginBB: __kmpc_barrier_simple_spmd(...);
+ // SMBeginBB: __kmpc_barrier_simple_generic(...);
// void *WorkFn;
// bool Active = __kmpc_kernel_parallel(&WorkFn);
// if (!WorkFn) return;
@@ -3138,7 +3477,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// ((WorkFnTy*)WorkFn)(...);
// SMEndParallelBB: __kmpc_kernel_end_parallel(...);
// }
- // SMDoneBB: __kmpc_barrier_simple_spmd(...);
+ // SMDoneBB: __kmpc_barrier_simple_generic(...);
// goto SMBeginBB;
// }
// UserCodeEntryBB: // user code
@@ -3150,6 +3489,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
BasicBlock *InitBB = KernelInitCB->getParent();
BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
KernelInitCB->getNextNode(), "thread.user_code.check");
+ BasicBlock *IsWorkerCheckBB =
+ BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineBeginBB = BasicBlock::Create(
Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
@@ -3166,6 +3507,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
A.registerManifestAddedBasicBlock(*InitBB);
A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
+ A.registerManifestAddedBasicBlock(*IsWorkerCheckBB);
A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
@@ -3175,16 +3517,38 @@ struct AAKernelInfoFunction : AAKernelInfo {
const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
-
InitBB->getTerminator()->eraseFromParent();
+
+ Module &M = *Kernel->getParent();
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ FunctionCallee BlockHwSizeFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
+ FunctionCallee WarpSizeFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_warp_size);
+ Instruction *BlockHwSize =
+ CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+ BlockHwSize->setDebugLoc(DLoc);
+ Instruction *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+ WarpSize->setDebugLoc(DLoc);
+ Instruction *BlockSize =
+ BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
+ BlockSize->setDebugLoc(DLoc);
+ Instruction *IsMainOrWorker =
+ ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
+ BlockSize, "thread.is_main_or_worker", InitBB);
+ IsMainOrWorker->setDebugLoc(DLoc);
+ BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
+ InitBB);
+
Instruction *IsWorker =
ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
ConstantInt::get(KernelInitCB->getType(), -1),
- "thread.is_worker", InitBB);
+ "thread.is_worker", IsWorkerCheckBB);
IsWorker->setDebugLoc(DLoc);
- BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);
-
- Module &M = *Kernel->getParent();
+ BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
+ IsWorkerCheckBB);
// Create local storage for the work function pointer.
const DataLayout &DL = M.getDataLayout();
@@ -3194,7 +3558,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
"worker.work_fn.addr", &Kernel->getEntryBlock().front());
WorkFnAI->setDebugLoc(DLoc);
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
OMPInfoCache.OMPBuilder.updateToLocation(
OpenMPIRBuilder::LocationDescription(
IRBuilder<>::InsertPoint(StateMachineBeginBB,
@@ -3206,7 +3569,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
FunctionCallee BarrierFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
- M, OMPRTL___kmpc_barrier_simple_spmd);
+ M, OMPRTL___kmpc_barrier_simple_generic);
CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
->setDebugLoc(DLoc);
@@ -3258,8 +3621,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Now that we have most of the CFG skeleton it is time for the if-cascade
// that checks the function pointer we got from the runtime against the
// parallel regions we expect, if there are any.
- for (int i = 0, e = ReachedKnownParallelRegions.size(); i < e; ++i) {
- auto *ParallelRegion = ReachedKnownParallelRegions[i];
+ for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
+ auto *ParallelRegion = ReachedKnownParallelRegions[I];
BasicBlock *PRExecuteBB = BasicBlock::Create(
Ctx, "worker_state_machine.parallel_region.execute", Kernel,
StateMachineEndParallelBB);
@@ -3275,7 +3638,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Check if we need to compare the pointer at all or if we can just
// call the parallel region function.
Value *IsPR;
- if (i + 1 < e || !ReachedUnknownParallelRegions.empty()) {
+ if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
Instruction *CmpI = ICmpInst::Create(
ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
"worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
@@ -3339,8 +3702,21 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (llvm::all_of(Objects,
[](const Value *Obj) { return isa<AllocaInst>(Obj); }))
return true;
+ // Check for AAHeapToStack moved objects which must not be guarded.
+ auto &HS = A.getAAFor<AAHeapToStack>(
+ *this, IRPosition::function(*I.getFunction()),
+ DepClassTy::OPTIONAL);
+ if (llvm::all_of(Objects, [&HS](const Value *Obj) {
+ auto *CB = dyn_cast<CallBase>(Obj);
+ if (!CB)
+ return false;
+ return HS.isAssumedHeapToStack(*CB);
+ })) {
+ return true;
+ }
}
- // For now we give up on everything but stores.
+
+ // Insert instruction that needs guarding.
SPMDCompatibilityTracker.insert(&I);
return true;
};
@@ -3354,9 +3730,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!IsKernelEntry) {
updateReachingKernelEntries(A);
updateParallelLevels(A);
+
+ if (!ParallelLevels.isValidState())
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
}
// Callback to check a call instruction.
+ bool AllParallelRegionStatesWereFixed = true;
bool AllSPMDStatesWereFixed = true;
auto CheckCallInst = [&](Instruction &I) {
auto &CB = cast<CallBase>(I);
@@ -3364,13 +3744,37 @@ struct AAKernelInfoFunction : AAKernelInfo {
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
getState() ^= CBAA.getState();
AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
+ AllParallelRegionStatesWereFixed &=
+ CBAA.ReachedKnownParallelRegions.isAtFixpoint();
+ AllParallelRegionStatesWereFixed &=
+ CBAA.ReachedUnknownParallelRegions.isAtFixpoint();
return true;
};
bool UsedAssumedInformationInCheckCallInst = false;
if (!A.checkForAllCallLikeInstructions(
- CheckCallInst, *this, UsedAssumedInformationInCheckCallInst))
+ CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
+ LLVM_DEBUG(dbgs() << TAG
+ << "Failed to visit all call-like instructions!\n";);
return indicatePessimisticFixpoint();
+ }
+
+ // If we haven't used any assumed information for the reached parallel
+ // region states we can fix it.
+ if (!UsedAssumedInformationInCheckCallInst &&
+ AllParallelRegionStatesWereFixed) {
+ ReachedKnownParallelRegions.indicateOptimisticFixpoint();
+ ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
+ }
+
+ // If we are sure there are no parallel regions in the kernel we do not
+ // want SPMD mode.
+ if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() &&
+ ReachedKnownParallelRegions.isAtFixpoint() &&
+ ReachedUnknownParallelRegions.isValidState() &&
+ ReachedKnownParallelRegions.isValidState() &&
+ !mayContainParallelRegion())
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
// If we haven't used any assumed information for the SPMD state we can fix
// it.
@@ -3469,14 +3873,14 @@ struct AAKernelInfoCallSite : AAKernelInfo {
CallBase &CB = cast<CallBase>(getAssociatedValue());
Function *Callee = getAssociatedFunction();
- // Helper to lookup an assumption string.
- auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) {
- return Fn && hasAssumption(*Fn, AssumptionStr);
- };
+ auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
// Check for SPMD-mode assumptions.
- if (HasAssumption(Callee, "ompx_spmd_amenable"))
+ if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ indicateOptimisticFixpoint();
+ }
// First weed out calls we do not care about, that is readonly/readnone
// calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
@@ -3498,14 +3902,16 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// Unknown callees might contain parallel regions, except if they have
// an appropriate assumption attached.
- if (!(HasAssumption(Callee, "omp_no_openmp") ||
- HasAssumption(Callee, "omp_no_parallelism")))
+ if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
+ AssumptionAA.hasAssumption("omp_no_parallelism")))
ReachedUnknownParallelRegions.insert(&CB);
// If SPMDCompatibilityTracker is not fixed, we need to give up on the
// idea we can run something unknown in SPMD-mode.
- if (!SPMDCompatibilityTracker.isAtFixpoint())
+ if (!SPMDCompatibilityTracker.isAtFixpoint()) {
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
+ }
// We have updated the state for this unknown call properly, there won't
// be any change so we indicate a fixpoint.
@@ -3521,6 +3927,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
switch (RF) {
// All the functions we know are compatible with SPMD mode.
case OMPRTL___kmpc_is_spmd_exec_mode:
+ case OMPRTL___kmpc_distribute_static_fini:
case OMPRTL___kmpc_for_static_fini:
case OMPRTL___kmpc_global_thread_num:
case OMPRTL___kmpc_get_hardware_num_threads_in_block:
@@ -3531,6 +3938,10 @@ struct AAKernelInfoCallSite : AAKernelInfo {
case OMPRTL___kmpc_end_master:
case OMPRTL___kmpc_barrier:
break;
+ case OMPRTL___kmpc_distribute_static_init_4:
+ case OMPRTL___kmpc_distribute_static_init_4u:
+ case OMPRTL___kmpc_distribute_static_init_8:
+ case OMPRTL___kmpc_distribute_static_init_8u:
case OMPRTL___kmpc_for_static_init_4:
case OMPRTL___kmpc_for_static_init_4u:
case OMPRTL___kmpc_for_static_init_8:
@@ -3548,6 +3959,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
case OMPScheduleType::DistributeChunked:
break;
default:
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
break;
};
@@ -3580,7 +3992,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
return;
default:
// Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
- // generally.
+ // generally. However, they do not hide parallel regions.
SPMDCompatibilityTracker.insert(&CB);
break;
}
@@ -3700,6 +4112,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
}
void initialize(Attributor &A) override {
+ if (DisableOpenMPOptFolding)
+ indicatePessimisticFixpoint();
+
Function *Callee = getAssociatedFunction();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
@@ -3756,11 +4171,24 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
- Instruction &CB = *getCtxI();
- A.changeValueAfterManifest(CB, **SimplifiedValue);
- A.deleteAfterManifest(CB);
+ Instruction &I = *getCtxI();
+ A.changeValueAfterManifest(I, **SimplifiedValue);
+ A.deleteAfterManifest(I);
- LLVM_DEBUG(dbgs() << TAG << "Folding runtime call: " << CB << " with "
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ auto Remark = [&](OptimizationRemark OR) {
+ if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
+ return OR << "Replacing OpenMP runtime call "
+ << CB->getCalledFunction()->getName() << " with "
+ << ore::NV("FoldedValue", C->getZExtValue()) << ".";
+ return OR << "Replacing OpenMP runtime call "
+ << CB->getCalledFunction()->getName() << ".";
+ };
+
+ if (CB && EnableVerboseRemarks)
+ A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
+
+ LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "
<< **SimplifiedValue << "\n");
Changed = ChangeStatus::CHANGED;
@@ -3994,7 +4422,6 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
DepClassTy::NONE, /* ForceUpdate */ false,
/* UpdateAfterInit */ false);
-
registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
@@ -4027,7 +4454,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
return false;
};
- GlobalizationRFI.foreachUse(SCC, CreateAA);
+ if (!DisableOpenMPOptDeglobalization)
+ GlobalizationRFI.foreachUse(SCC, CreateAA);
// Create an ExecutionDomain AA for every function and a HeapToStack AA for
// every function if there is a device kernel.
@@ -4039,7 +4467,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
continue;
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
- A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
+ if (!DisableOpenMPOptDeglobalization)
+ A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
for (auto &I : instructions(*F)) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
@@ -4234,12 +4663,24 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
+
+ // Optionally inline device functions for potentially better performance.
+ if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
+ for (Function &F : M)
+ if (!F.isDeclaration() && !Kernels.contains(&F) &&
+ !F.hasFnAttribute(Attribute::NoInline))
+ F.addFnAttr(Attribute::AlwaysInline);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);
+
if (Changed)
return PreservedAnalyses::none();
@@ -4286,12 +4727,17 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
/*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
+
if (Changed)
return PreservedAnalyses::none();
@@ -4352,12 +4798,18 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
Allocator,
/*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
- return OMPOpt.run(false);
+ bool Result = OMPOpt.run(false);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
+
+ return Result;
}
bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
index d517de38ace3..7402e399a88a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -441,9 +441,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
};
auto BBProfileCount = [BFI](BasicBlock *BB) {
- return BFI->getBlockProfileCount(BB)
- ? BFI->getBlockProfileCount(BB).getValue()
- : 0;
+ return BFI->getBlockProfileCount(BB).getValueOr(0);
};
// Use the same computeBBInlineCost function to compute the cost savings of
@@ -1413,7 +1411,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
uint64_t CalleeEntryCountV =
- (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
+ (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
bool AnyInline = false;
for (User *User : Users) {
@@ -1461,8 +1459,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
if (AnyInline) {
Cloner.IsFunctionInlined = true;
if (CalleeEntryCount)
- Cloner.OrigFunc->setEntryCount(
- CalleeEntryCount.setCount(CalleeEntryCountV));
+ Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
+ CalleeEntryCountV, CalleeEntryCount->getType()));
OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
OrigFuncORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index aa916345954d..74f68531b89a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -437,6 +437,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
+ // The matrix extension can introduce large vector operations early, which can
+ // benefit from running vector-combine early on.
+ if (EnableMatrix)
+ MPM.add(createVectorCombinePass());
+
// Begin the loop pass pipeline.
if (EnableSimpleLoopUnswitch) {
// The simple loop unswitch pass relies on separate cleanup passes. Schedule
@@ -1012,7 +1017,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
// Propage constant function arguments by specializing the functions.
- if (EnableFunctionSpecialization)
+ if (EnableFunctionSpecialization && OptLevel > 2)
PM.add(createFunctionSpecializationPass());
// Propagate constants at call sites into the functions they call. This
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
index 081398a390fa..5779553ee732 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -135,6 +135,7 @@ PreservedAnalyses FunctionSpecializationPass::run(Module &M,
return PA;
}
+namespace {
struct FunctionSpecializationLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
FunctionSpecializationLegacyPass() : ModulePass(ID) {}
@@ -175,6 +176,7 @@ struct FunctionSpecializationLegacyPass : public ModulePass {
return runFunctionSpecialization(M, DL, GetTLI, GetTTI, GetAC, GetAnalysis);
}
};
+} // namespace
char FunctionSpecializationLegacyPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 55b88ac14da5..bae9a1e27e75 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -32,7 +32,7 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
if (CalleeName.empty())
return getHottestChildContext(CallSite);
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end())
return &It->second;
@@ -64,8 +64,8 @@ ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
ContextTrieNode &ContextTrieNode::moveToChildContext(
const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
- StringRef ContextStrToRemove, bool DeleteNode) {
- uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
+ uint32_t ContextFramesToRemove, bool DeleteNode) {
+ uint64_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
assert(!AllChildContext.count(Hash) && "Node to remove must exist");
LineLocation OldCallSite = NodeToMove.CallSiteLoc;
ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
@@ -86,10 +86,10 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
FunctionSamples *FSamples = Node->getFunctionSamples();
if (FSamples) {
- FSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FSamples->getContext().promoteOnPath(ContextFramesToRemove);
FSamples->getContext().setState(SyntheticContext);
- LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext()
- << "\n");
+ LLVM_DEBUG(dbgs() << " Context promoted to: "
+ << FSamples->getContext().toString() << "\n");
}
for (auto &It : Node->getAllChildContext()) {
@@ -108,12 +108,12 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
void ContextTrieNode::removeChildContext(const LineLocation &CallSite,
StringRef CalleeName) {
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
// Note this essentially calls dtor and destroys that child context
AllChildContext.erase(Hash);
}
-std::map<uint32_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
+std::map<uint64_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
return AllChildContext;
}
@@ -127,6 +127,15 @@ void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
FuncSamples = FSamples;
}
+Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; }
+
+void ContextTrieNode::addFunctionSize(uint32_t FSize) {
+ if (!FuncSize.hasValue())
+ FuncSize = 0;
+
+ FuncSize = FuncSize.getValue() + FSize;
+}
+
LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
ContextTrieNode *ContextTrieNode::getParentContext() const {
@@ -137,9 +146,10 @@ void ContextTrieNode::setParentContext(ContextTrieNode *Parent) {
ParentContext = Parent;
}
-void ContextTrieNode::dump() {
+void ContextTrieNode::dumpNode() {
dbgs() << "Node: " << FuncName << "\n"
<< " Callsite: " << CallSiteLoc << "\n"
+ << " Size: " << FuncSize << "\n"
<< " Children:\n";
for (auto &It : AllChildContext) {
@@ -147,20 +157,38 @@ void ContextTrieNode::dump() {
}
}
-uint32_t ContextTrieNode::nodeHash(StringRef ChildName,
+void ContextTrieNode::dumpTree() {
+ dbgs() << "Context Profile Tree:\n";
+ std::queue<ContextTrieNode *> NodeQueue;
+ NodeQueue.push(this);
+
+ while (!NodeQueue.empty()) {
+ ContextTrieNode *Node = NodeQueue.front();
+ NodeQueue.pop();
+ Node->dumpNode();
+
+ for (auto &It : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &It.second;
+ NodeQueue.push(ChildNode);
+ }
+ }
+}
+
+uint64_t ContextTrieNode::nodeHash(StringRef ChildName,
const LineLocation &Callsite) {
// We still use child's name for child hash, this is
// because for children of root node, we don't have
// different line/discriminator, and we'll rely on name
// to differentiate children.
- uint32_t NameHash = std::hash<std::string>{}(ChildName.str());
- uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator;
+ uint64_t NameHash = std::hash<std::string>{}(ChildName.str());
+ uint64_t LocId =
+ (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator;
return NameHash + (LocId << 5) + LocId;
}
ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) {
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end()) {
assert(It->second.getFuncName() == CalleeName &&
@@ -177,13 +205,16 @@ ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
// Profiler tracker than manages profiles and its associated context
SampleContextTracker::SampleContextTracker(
- StringMap<FunctionSamples> &Profiles) {
+ SampleProfileMap &Profiles,
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap)
+ : GUIDToFuncNameMap(GUIDToFuncNameMap) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
- SampleContext Context(FuncSample.first(), RawContext);
- LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
+ SampleContext Context = FuncSample.first;
+ LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
+ << "\n");
if (!Context.isBaseContext())
- FuncToCtxtProfiles[Context.getNameWithoutContext()].push_back(FSamples);
+ FuncToCtxtProfiles[Context.getName()].insert(FSamples);
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
assert(!NewNode->getFunctionSamples() &&
"New node can't have sample profile");
@@ -200,6 +231,10 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
return nullptr;
CalleeName = FunctionSamples::getCanonicalFnName(CalleeName);
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::string FGUID;
+ CalleeName = getRepInFormat(CalleeName, FunctionSamples::UseMD5, FGUID);
// For indirect call, CalleeName will be empty, in which case the context
// profile for callee with largest total samples will be returned.
@@ -207,7 +242,8 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
if (CalleeContext) {
FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
LLVM_DEBUG(if (FSamples) {
- dbgs() << " Callee context found: " << FSamples->getContext() << "\n";
+ dbgs() << " Callee context found: " << FSamples->getContext().toString()
+ << "\n";
});
return FSamples;
}
@@ -285,6 +321,11 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
bool MergeContext) {
LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n");
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::string FGUID;
+ Name = getRepInFormat(Name, FunctionSamples::UseMD5, FGUID);
+
// Base profile is top-level node (child of root node), so try to retrieve
// existing top-level node for given function first. If it exists, it could be
// that we've merged base profile before, or there's actually context-less
@@ -299,14 +340,14 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
// into base profile.
for (auto *CSamples : FuncToCtxtProfiles[Name]) {
SampleContext &Context = CSamples->getContext();
- ContextTrieNode *FromNode = getContextFor(Context);
- if (FromNode == Node)
- continue;
-
// Skip inlined context profile and also don't re-merge any context
if (Context.hasState(InlinedContext) || Context.hasState(MergedContext))
continue;
+ ContextTrieNode *FromNode = getContextFor(Context);
+ if (FromNode == Node)
+ continue;
+
ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode);
assert((!Node || Node == &ToNode) && "Expect only one base profile");
Node = &ToNode;
@@ -324,7 +365,7 @@ void SampleContextTracker::markContextSamplesInlined(
const FunctionSamples *InlinedSamples) {
assert(InlinedSamples && "Expect non-null inlined samples");
LLVM_DEBUG(dbgs() << "Marking context profile as inlined: "
- << InlinedSamples->getContext() << "\n");
+ << InlinedSamples->getContext().toString() << "\n");
InlinedSamples->getContext().setState(InlinedContext);
}
@@ -376,30 +417,23 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples();
assert(FromSamples && "Shouldn't promote a context without profile");
LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
- << FromSamples->getContext() << "\n");
+ << FromSamples->getContext().toString() << "\n");
assert(!FromSamples->getContext().hasState(InlinedContext) &&
"Shouldn't promote inlined context profile");
- StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
+ uint32_t ContextFramesToRemove =
+ FromSamples->getContext().getContextFrames().size() - 1;
return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
- ContextStrToRemove);
+ ContextFramesToRemove);
}
-void SampleContextTracker::dump() {
- dbgs() << "Context Profile Tree:\n";
- std::queue<ContextTrieNode *> NodeQueue;
- NodeQueue.push(&RootContext);
-
- while (!NodeQueue.empty()) {
- ContextTrieNode *Node = NodeQueue.front();
- NodeQueue.pop();
- Node->dump();
+void SampleContextTracker::dump() { RootContext.dumpTree(); }
- for (auto &It : Node->getAllChildContext()) {
- ContextTrieNode *ChildNode = &It.second;
- NodeQueue.push(ChildNode);
- }
- }
+StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const {
+ if (!FunctionSamples::UseMD5)
+ return Node->getFuncName();
+ assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first");
+ return GUIDToFuncNameMap->lookup(std::stoull(Node->getFuncName().data()));
}
ContextTrieNode *
@@ -444,11 +478,22 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
RootName = PrevDIL->getScope()->getSubprogram()->getName();
S.push_back(std::make_pair(LineLocation(0, 0), RootName));
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::list<std::string> MD5Names;
+ if (FunctionSamples::UseMD5) {
+ for (auto &Location : S) {
+ MD5Names.emplace_back();
+ getRepInFormat(Location.second, FunctionSamples::UseMD5, MD5Names.back());
+ Location.second = MD5Names.back();
+ }
+ }
+
ContextTrieNode *ContextNode = &RootContext;
int I = S.size();
while (--I >= 0 && ContextNode) {
LineLocation &CallSite = S[I].first;
- StringRef &CalleeName = S[I].second;
+ StringRef CalleeName = S[I].second;
ContextNode = ContextNode->getChildContext(CallSite, CalleeName);
}
@@ -462,27 +507,18 @@ ContextTrieNode *
SampleContextTracker::getOrCreateContextPath(const SampleContext &Context,
bool AllowCreate) {
ContextTrieNode *ContextNode = &RootContext;
- StringRef ContextRemain = Context;
- StringRef ChildContext;
- StringRef CalleeName;
LineLocation CallSiteLoc(0, 0);
- while (ContextNode && !ContextRemain.empty()) {
- auto ContextSplit = SampleContext::splitContextString(ContextRemain);
- ChildContext = ContextSplit.first;
- ContextRemain = ContextSplit.second;
- LineLocation NextCallSiteLoc(0, 0);
- SampleContext::decodeContextString(ChildContext, CalleeName,
- NextCallSiteLoc);
-
+ for (auto &Callsite : Context.getContextFrames()) {
// Create child node at parent line/disc location
if (AllowCreate) {
ContextNode =
- ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName);
+ ContextNode->getOrCreateChildContext(CallSiteLoc, Callsite.FuncName);
} else {
- ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName);
+ ContextNode =
+ ContextNode->getChildContext(CallSiteLoc, Callsite.FuncName);
}
- CallSiteLoc = NextCallSiteLoc;
+ CallSiteLoc = Callsite.Location;
}
assert((!AllowCreate || ContextNode) &&
@@ -502,7 +538,7 @@ ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
ContextTrieNode &ToNode,
- StringRef ContextStrToRemove) {
+ uint32_t ContextFramesToRemove) {
FunctionSamples *FromSamples = FromNode.getFunctionSamples();
FunctionSamples *ToSamples = ToNode.getFunctionSamples();
if (FromSamples && ToSamples) {
@@ -510,19 +546,21 @@ void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
ToSamples->merge(*FromSamples);
ToSamples->getContext().setState(SyntheticContext);
FromSamples->getContext().setState(MergedContext);
+ if (FromSamples->getContext().hasAttribute(ContextShouldBeInlined))
+ ToSamples->getContext().setAttribute(ContextShouldBeInlined);
} else if (FromSamples) {
// Transfer FromSamples from FromNode to ToNode
ToNode.setFunctionSamples(FromSamples);
FromSamples->getContext().setState(SyntheticContext);
- FromSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FromSamples->getContext().promoteOnPath(ContextFramesToRemove);
FromNode.setFunctionSamples(nullptr);
}
}
ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent,
- StringRef ContextStrToRemove) {
- assert(!ContextStrToRemove.empty() && "Context to remove can't be empty");
+ uint32_t ContextFramesToRemove) {
+ assert(ContextFramesToRemove && "Context to remove can't be empty");
// Ignore call site location if destination is top level under root
LineLocation NewCallSiteLoc = LineLocation(0, 0);
@@ -540,21 +578,21 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
// Do not delete node to move from its parent here because
// caller is iterating over children of that parent node.
ToNode = &ToNodeParent.moveToChildContext(
- NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false);
+ NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false);
} else {
// Destination node exists, merge samples for the context tree
- mergeContextNode(FromNode, *ToNode, ContextStrToRemove);
+ mergeContextNode(FromNode, *ToNode, ContextFramesToRemove);
LLVM_DEBUG({
if (ToNode->getFunctionSamples())
dbgs() << " Context promoted and merged to: "
- << ToNode->getFunctionSamples()->getContext() << "\n";
+ << ToNode->getFunctionSamples()->getContext().toString() << "\n";
});
// Recursively promote and merge children
for (auto &It : FromNode.getAllChildContext()) {
ContextTrieNode &FromChildNode = It.second;
promoteMergeContextSamplesTree(FromChildNode, *ToNode,
- ContextStrToRemove);
+ ContextFramesToRemove);
}
// Remove children once they're all merged
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 8e9c79fc7bbb..a961c47a7501 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -143,6 +143,12 @@ static cl::opt<bool> ProfileSampleAccurate(
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
+static cl::opt<bool> ProfileSampleBlockAccurate(
+ "profile-sample-block-accurate", cl::Hidden, cl::init(false),
+ cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+ "branches and calls as having 0 samples. Otherwise, treat "
+ "them conservatively as unknown. "));
+
static cl::opt<bool> ProfileAccurateForSymsInList(
"profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
cl::init(true),
@@ -214,6 +220,16 @@ static cl::opt<bool> CallsitePrioritizedInline(
cl::desc("Use call site prioritized inlining for sample profile loader."
"Currently only CSSPGO is supported."));
+static cl::opt<bool> UsePreInlinerDecision(
+ "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Use the preinliner decisions stored in profile context."));
+
+static cl::opt<bool> AllowRecursiveInline(
+ "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Allow sample loader inliner to inline recursive calls."));
+
static cl::opt<std::string> ProfileInlineReplayFile(
"sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
@@ -221,6 +237,50 @@ static cl::opt<std::string> ProfileInlineReplayFile(
"by inlining from sample profile loader."),
cl::Hidden);
+static cl::opt<ReplayInlinerSettings::Scope> ProfileInlineReplayScope(
+ "sample-profile-inline-replay-scope",
+ cl::init(ReplayInlinerSettings::Scope::Function),
+ cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
+ "Replay on functions that have remarks associated "
+ "with them (default)"),
+ clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
+ "Replay on the entire module")),
+ cl::desc("Whether inline replay should be applied to the entire "
+ "Module or just the Functions (default) that are present as "
+ "callers in remarks during sample profile inlining."),
+ cl::Hidden);
+
+static cl::opt<ReplayInlinerSettings::Fallback> ProfileInlineReplayFallback(
+ "sample-profile-inline-replay-fallback",
+ cl::init(ReplayInlinerSettings::Fallback::Original),
+ cl::values(
+ clEnumValN(
+ ReplayInlinerSettings::Fallback::Original, "Original",
+ "All decisions not in replay send to original advisor (default)"),
+ clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
+ "AlwaysInline", "All decisions not in replay are inlined"),
+ clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
+ "All decisions not in replay are not inlined")),
+ cl::desc("How sample profile inline replay treats sites that don't come "
+ "from the replay. Original: defers to original advisor, "
+ "AlwaysInline: inline all sites not in replay, NeverInline: "
+ "inline no sites not in replay"),
+ cl::Hidden);
+
+static cl::opt<CallSiteFormat::Format> ProfileInlineReplayFormat(
+ "sample-profile-inline-replay-format",
+ cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
+ cl::values(
+ clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
+ clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
+ "<Line Number>:<Column Number>"),
+ clEnumValN(CallSiteFormat::Format::LineDiscriminator,
+ "LineDiscriminator", "<Line Number>.<Discriminator>"),
+ clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
+ "LineColumnDiscriminator",
+ "<Line Number>:<Column Number>.<Discriminator> (default)")),
+ cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
+
static cl::opt<unsigned>
MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
cl::ZeroOrMore,
@@ -358,10 +418,10 @@ public:
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI)
- : SampleProfileLoaderBaseImpl(std::string(Name)),
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
- RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
+ LTOPhase(LTOPhase) {}
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -377,7 +437,7 @@ protected:
findFunctionSamples(const Instruction &I) const override;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
- void findExternalInlineCandidate(const FunctionSamples *Samples,
+ void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap,
uint64_t Threshold);
@@ -385,8 +445,11 @@ protected:
bool tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
+ bool getExternalInlineAdvisorShouldInline(CallBase &CB);
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
bool
@@ -417,9 +480,6 @@ protected:
/// Profile tracker for different context.
std::unique_ptr<SampleContextTracker> ContextTracker;
- /// Name of the profile remapping file to load.
- std::string RemappingFilename;
-
/// Flag indicating whether input profile is context-sensitive
bool ProfileIsCS = false;
@@ -464,7 +524,7 @@ protected:
bool ProfAccForSymsInList;
// External inline advisor used to replay inline decision from remarks.
- std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
+ std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
// A pseudo probe helper to correlate the imported sample counts.
std::unique_ptr<PseudoProbeManager> ProbeManager;
@@ -953,8 +1013,24 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
}
void SampleProfileLoader::findExternalInlineCandidate(
- const FunctionSamples *Samples, DenseSet<GlobalValue::GUID> &InlinedGUIDs,
+ CallBase *CB, const FunctionSamples *Samples,
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
+
+ // If ExternalInlineAdvisor wants to inline an external function
+ // make sure it's imported
+ if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
+ // Samples may not exist for replayed function, if so
+ // just add the direct GUID and move on
+ if (!Samples) {
+ InlinedGUIDs.insert(
+ FunctionSamples::getGUID(CB->getCalledFunction()->getName()));
+ return;
+ }
+ // Otherwise, drop the threshold to import everything that we can
+ Threshold = 0;
+ }
+
assert(Samples && "expect non-null caller profile");
// For AutoFDO profile, retrieve candidate profiles by walking over
@@ -975,14 +1051,21 @@ void SampleProfileLoader::findExternalInlineCandidate(
// For CSSPGO profile, retrieve candidate profile by walking over the
// trie built for context profile. Note that also take call targets
// even if callee doesn't have a corresponding context profile.
- if (!CalleeSample || CalleeSample->getEntrySamples() < Threshold)
+ if (!CalleeSample)
+ continue;
+
+ // If pre-inliner decision is used, honor that for importing as well.
+ bool PreInline =
+ UsePreInlinerDecision &&
+ CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
+ if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
continue;
StringRef Name = CalleeSample->getFuncName();
Function *Func = SymbolMap.lookup(Name);
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(Name));
+ InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
@@ -992,7 +1075,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
const Function *Callee = SymbolMap.lookup(CalleeName);
if (!Callee || Callee->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName));
+ InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
}
// Import hot child context profile associted with callees. Note that this
@@ -1042,16 +1125,20 @@ bool SampleProfileLoader::inlineHotFunctions(
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
if (auto *CB = dyn_cast<CallBase>(&I)) {
- if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
- assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
- "GUIDToFuncNameMap has to be populated");
- AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || ProfileIsCS)
- LocalNotInlinedCallSites.try_emplace(CB, FS);
- if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
- Hot = true;
- else if (shouldInlineColdCallee(*CB))
- ColdCandidates.push_back(CB);
+ if (!isa<IntrinsicInst>(I)) {
+ if ((FS = findCalleeFunctionSamples(*CB))) {
+ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
+ "GUIDToFuncNameMap has to be populated");
+ AllCandidates.push_back(CB);
+ if (FS->getEntrySamples() > 0 || ProfileIsCS)
+ LocalNotInlinedCallSites.try_emplace(CB, FS);
+ if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
+ Hot = true;
+ else if (shouldInlineColdCallee(*CB))
+ ColdCandidates.push_back(CB);
+ } else if (getExternalInlineAdvisorShouldInline(*CB)) {
+ AllCandidates.push_back(CB);
+ }
}
}
}
@@ -1078,7 +1165,7 @@ bool SampleProfileLoader::inlineHotFunctions(
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
uint64_t SumOrigin = Sum;
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
+ findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
continue;
}
@@ -1098,8 +1185,8 @@ bool SampleProfileLoader::inlineHotFunctions(
LocalChanged = true;
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs,
- SymbolMap,
+ findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
+ InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
}
}
@@ -1184,8 +1271,8 @@ bool SampleProfileLoader::tryInlineCandidate(
*CalledFunction);
// The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
- true, CSINLINE_DEBUG);
+ emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction,
+ *BB->getParent(), Cost, true, CSINLINE_DEBUG);
// Now populate the list of newly exposed call sites.
if (InlinedCallSites) {
@@ -1228,7 +1315,9 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
// Find the callee's profile. For indirect call, find hottest target profile.
const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
- if (!CalleeSamples)
+ // If ExternalInlineAdvisor wants to inline this site, do so even
+ // if Samples are not present.
+ if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
return false;
float Factor = 1.0;
@@ -1247,19 +1336,34 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
return true;
}
-InlineCost
-SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+Optional<InlineCost>
+SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
std::unique_ptr<InlineAdvice> Advice = nullptr;
if (ExternalInlineAdvisor) {
- Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
- if (!Advice->isInliningRecommended()) {
- Advice->recordUnattemptedInlining();
- return InlineCost::getNever("not previously inlined");
+ Advice = ExternalInlineAdvisor->getAdvice(CB);
+ if (Advice) {
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ return InlineCost::getNever("not previously inlined");
+ }
+ Advice->recordInlining();
+ return InlineCost::getAlways("previously inlined");
}
- Advice->recordInlining();
- return InlineCost::getAlways("previously inlined");
}
+ return {};
+}
+
+bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
+ Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
+ return Cost ? !!Cost.getValue() : false;
+}
+
+InlineCost
+SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+ if (Optional<InlineCost> ReplayCost =
+ getExternalInlineAdvisorCost(*Candidate.CallInstr))
+ return ReplayCost.getValue();
// Adjust threshold based on call site hotness, only do this for callsite
// prioritized inliner because otherwise cost-benefit check is done earlier.
int SampleThreshold = SampleColdCallSiteThreshold;
@@ -1274,7 +1378,9 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
assert(Callee && "Expect a definition for inline candidate of direct call");
InlineParams Params = getInlineParams();
+ // We will ignore the threshold from inline cost, so always get full cost.
Params.ComputeFullInlineCost = true;
+ Params.AllowRecursiveCall = AllowRecursiveInline;
// Checks if there is anything in the reachable portion of the callee at
// this callsite that makes this inlining potentially illegal. Need to
// set ComputeFullInlineCost, otherwise getInlineCost may return early
@@ -1288,6 +1394,25 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
if (Cost.isNever() || Cost.isAlways())
return Cost;
+ // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
+ // decisions based on hotness as well as accurate function byte sizes for
+ // given context using function/inlinee sizes from previous build. It
+ // stores the decision in profile, and also adjust/merge context profile
+ // aiming at better context-sensitive post-inline profile quality, assuming
+ // all inline decision estimates are going to be honored by compiler. Here
+ // we replay that inline decision under `sample-profile-use-preinliner`.
+ // Note that we don't need to handle negative decision from preinliner as
+ // context profile for not inlined calls are merged by preinliner already.
+ if (UsePreInlinerDecision && Candidate.CalleeSamples) {
+ // Once two node are merged due to promotion, we're losing some context
+ // so the original context-sensitive preinliner decision should be ignored
+ // for SyntheticContext.
+ SampleContext &Context = Candidate.CalleeSamples->getContext();
+ if (!Context.hasState(SyntheticContext) &&
+ Context.hasAttribute(ContextShouldBeInlined))
+ return InlineCost::getAlways("preinliner");
+ }
+
// For old FDO inliner, we inline the call site as long as cost is not
// "Never". The cost-benefit check is done earlier.
if (!CallsitePrioritizedInline) {
@@ -1357,7 +1482,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
for (const auto *FS : CalleeSamples) {
// TODO: Consider disable pre-lTO ICP for MonoLTO as well
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
+ findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
continue;
}
@@ -1405,8 +1530,9 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Changed = true;
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(Candidate.CalleeSamples, InlinedGUIDs,
- SymbolMap, PSI->getOrCompHotCountThreshold());
+ findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
+ InlinedGUIDs, SymbolMap,
+ PSI->getOrCompHotCountThreshold());
}
}
@@ -1494,7 +1620,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
{static_cast<uint32_t>(BlockWeights[BB])}));
}
}
- } else if (OverwriteExistingWeights) {
+ } else if (OverwriteExistingWeights || ProfileSampleBlockAccurate) {
// Set profile metadata (possibly annotated by LTO prelink) to zero or
// clear it for cold code.
for (auto &I : BB->getInstList()) {
@@ -1792,11 +1918,13 @@ bool SampleProfileLoader::doInitialization(Module &M,
}
if (FAM && !ProfileInlineReplayFile.empty()) {
- ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
- M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
+ ExternalInlineAdvisor = getReplayInlineAdvisor(
+ M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
+ ReplayInlinerSettings{ProfileInlineReplayFile,
+ ProfileInlineReplayScope,
+ ProfileInlineReplayFallback,
+ {ProfileInlineReplayFormat}},
/*EmitRemarks=*/false);
- if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
- ExternalInlineAdvisor.reset();
}
// Apply tweaks if context-sensitive profile is available.
@@ -1810,13 +1938,21 @@ bool SampleProfileLoader::doInitialization(Module &M,
if (!CallsitePrioritizedInline.getNumOccurrences())
CallsitePrioritizedInline = true;
+ // For CSSPGO, use preinliner decision by default when available.
+ if (!UsePreInlinerDecision.getNumOccurrences())
+ UsePreInlinerDecision = true;
+
+ // For CSSPGO, we also allow recursive inline to best use context profile.
+ if (!AllowRecursiveInline.getNumOccurrences())
+ AllowRecursiveInline = true;
+
// Enable iterative-BFI by default for CSSPGO.
if (!UseIterativeBFIInference.getNumOccurrences())
UseIterativeBFIInference = true;
// Tracker for profiles under different context
- ContextTracker =
- std::make_unique<SampleContextTracker>(Reader->getProfiles());
+ ContextTracker = std::make_unique<SampleContextTracker>(
+ Reader->getProfiles(), &GUIDToFuncNameMap);
}
// Load pseudo probe descriptors for probe-based function samples.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 08d316337ef5..21395460bccb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -415,9 +415,7 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
FunctionAnalysisManager &FAM) {
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto BBProfileCount = [&BFI](BasicBlock *BB) {
- return BFI.getBlockProfileCount(BB)
- ? BFI.getBlockProfileCount(BB).getValue()
- : 0;
+ return BFI.getBlockProfileCount(BB).getValueOr(0);
};
// Collect the sum of execution weight for each probe.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 655a7a404951..0f2412dce1c9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -30,23 +30,20 @@ static bool stripDeadPrototypes(Module &M) {
bool MadeChange = false;
// Erase dead function prototypes.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function *F = &*I++;
+ for (Function &F : llvm::make_early_inc_range(M)) {
// Function must be a prototype and unused.
- if (F->isDeclaration() && F->use_empty()) {
- F->eraseFromParent();
+ if (F.isDeclaration() && F.use_empty()) {
+ F.eraseFromParent();
++NumDeadPrototypes;
MadeChange = true;
}
}
// Erase dead global var prototypes.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ) {
- GlobalVariable *GV = &*I++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// Global must be a prototype and unused.
- if (GV->isDeclaration() && GV->use_empty())
- GV->eraseFromParent();
+ if (GV.isDeclaration() && GV.use_empty())
+ GV.eraseFromParent();
}
// Return an indication of whether we changed anything or not.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 168740a1158e..9d4e9464f361 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -214,13 +214,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
for (GlobalVariable &GV : M.globals()) {
- if (GV.hasLocalLinkage() && llvmUsedValues.count(&GV) == 0)
+ if (GV.hasLocalLinkage() && !llvmUsedValues.contains(&GV))
if (!PreserveDbgInfo || !GV.getName().startswith("llvm.dbg"))
GV.setName(""); // Internal symbols can't participate in linkage
}
for (Function &I : M) {
- if (I.hasLocalLinkage() && llvmUsedValues.count(&I) == 0)
+ if (I.hasLocalLinkage() && !llvmUsedValues.contains(&I))
if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg"))
I.setName(""); // Internal symbols can't participate in linkage
if (auto *Symtab = I.getValueSymbolTable())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index eea848d3eb2f..0cc1b37844f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -164,8 +164,7 @@ void simplifyExternals(Module &M) {
FunctionType *EmptyFT =
FunctionType::get(Type::getVoidTy(M.getContext()), false);
- for (auto I = M.begin(), E = M.end(); I != E;) {
- Function &F = *I++;
+ for (Function &F : llvm::make_early_inc_range(M)) {
if (F.isDeclaration() && F.use_empty()) {
F.eraseFromParent();
continue;
@@ -181,16 +180,15 @@ void simplifyExternals(Module &M) {
F.getAddressSpace(), "", &M);
NewF->copyAttributesFrom(&F);
// Only copy function attribtues.
- NewF->setAttributes(
- AttributeList::get(M.getContext(), AttributeList::FunctionIndex,
- F.getAttributes().getFnAttributes()));
+ NewF->setAttributes(AttributeList::get(M.getContext(),
+ AttributeList::FunctionIndex,
+ F.getAttributes().getFnAttrs()));
NewF->takeName(&F);
F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
F.eraseFromParent();
}
- for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
- GlobalVariable &GV = *I++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
if (GV.isDeclaration() && GV.use_empty()) {
GV.eraseFromParent();
continue;
@@ -325,7 +323,8 @@ void splitAndWriteThinLTOBitcode(
return true;
if (auto *F = dyn_cast<Function>(GV))
return EligibleVirtualFns.count(F);
- if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (auto *GVar =
+ dyn_cast_or_null<GlobalVariable>(GV->getAliaseeObject()))
return HasTypeMetadata(GVar);
return false;
}));
@@ -354,7 +353,7 @@ void splitAndWriteThinLTOBitcode(
// Remove all globals with type metadata, globals with comdats that live in
// MergedM, and aliases pointing to such globals from the thin LTO module.
filterModule(&M, [&](const GlobalValue *GV) {
- if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getAliaseeObject()))
if (HasTypeMetadata(GVar))
return false;
if (const auto *C = GV->getComdat())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 7a8946110785..61054e7ae46f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1288,7 +1288,7 @@ void DevirtModule::tryICallBranchFunnel(
M.getDataLayout().getProgramAddressSpace(),
"branch_funnel", &M);
}
- JT->addAttribute(1, Attribute::Nest);
+ JT->addParamAttr(0, Attribute::Nest);
std::vector<Value *> JTArgs;
JTArgs.push_back(JT->arg_begin());
@@ -1361,10 +1361,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
M.getContext(), ArrayRef<Attribute>{Attribute::get(
M.getContext(), Attribute::Nest)}));
for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
- NewArgAttrs.push_back(Attrs.getParamAttributes(I));
+ NewArgAttrs.push_back(Attrs.getParamAttrs(I));
NewCS->setAttributes(
- AttributeList::get(M.getContext(), Attrs.getFnAttributes(),
- Attrs.getRetAttributes(), NewArgAttrs));
+ AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
+ Attrs.getRetAttrs(), NewArgAttrs));
CB.replaceAllUsesWith(NewCS);
CB.eraseFromParent();
@@ -1786,10 +1786,8 @@ void DevirtModule::scanTypeTestUsers(
// points to a member of the type identifier %md. Group calls by (type ID,
// offset) pair (effectively the identity of the virtual function) and store
// to CallSlots.
- for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
- I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;
@@ -1858,11 +1856,8 @@ void DevirtModule::scanTypeTestUsers(
void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
- for (auto I = TypeCheckedLoadFunc->use_begin(),
- E = TypeCheckedLoadFunc->use_end();
- I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;