diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
commit | 145449b1e420787bb99721a429341fa6be3adfb6 (patch) | |
tree | 1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp | |
parent | ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff) | |
download | src-145449b1e420787bb99721a429341fa6be3adfb6.tar.gz src-145449b1e420787bb99721a429341fa6be3adfb6.zip |
Vendor import of llvm-project main llvmorg-15-init-15358-g53dc0f107877.vendor/llvm-project/llvmorg-15-init-15358-g53dc0f107877
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp | 64 |
1 files changed, 44 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp index 01d03d17ec47..ed450f59e4b3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp @@ -16,7 +16,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" @@ -30,6 +32,8 @@ namespace { class AMDGPUPromoteKernelArguments : public FunctionPass { MemorySSA *MSSA; + AliasAnalysis *AA; + Instruction *ArgCastInsertPt; SmallVector<Value *> Ptrs; @@ -38,16 +42,19 @@ class AMDGPUPromoteKernelArguments : public FunctionPass { bool promotePointer(Value *Ptr); + bool promoteLoad(LoadInst *LI); + public: static char ID; AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} - bool run(Function &F, MemorySSA &MSSA); + bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MemorySSAWrapperPass>(); AU.setPreservesAll(); } @@ -68,17 +75,10 @@ void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { break; case Instruction::Load: { LoadInst *LD = cast<LoadInst>(U); - PointerType *PT = dyn_cast<PointerType>(LD->getType()); - if (!PT || - (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && - PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && - PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) || - LD->getPointerOperand()->stripInBoundsOffsets() != Ptr) - break; - const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD); - // TODO: This load poprobably can be promoted to constant address space. - if (MSSA->isLiveOnEntryDef(MA)) + if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr && + !AMDGPU::isClobberedInFunction(LD, MSSA, AA)) Ptrs.push_back(LD); + break; } case Instruction::GetElementPtr: @@ -92,15 +92,26 @@ void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { } bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { - enqueueUsers(Ptr); + bool Changed = false; + + LoadInst *LI = dyn_cast<LoadInst>(Ptr); + if (LI) + Changed |= promoteLoad(LI); + + PointerType *PT = dyn_cast<PointerType>(Ptr->getType()); + if (!PT) + return Changed; + + if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || + PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || + PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) + enqueueUsers(Ptr); - PointerType *PT = cast<PointerType>(Ptr->getType()); if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) - return false; + return Changed; - bool IsArg = isa<Argument>(Ptr); - IRBuilder<> B(IsArg ? ArgCastInsertPt - : &*std::next(cast<Instruction>(Ptr)->getIterator())); + IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator()) + : ArgCastInsertPt); // Cast pointer to global address space and back to flat and let // Infer Address Spaces pass to do all necessary rewriting. @@ -116,6 +127,14 @@ bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { return true; } +bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) { + if (!LI->isSimple()) + return false; + + LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {})); + return true; +} + // skip allocas static BasicBlock::iterator getInsertPt(BasicBlock &BB) { BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); @@ -131,7 +150,8 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) { return InsPt; } -bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) { +bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA, + AliasAnalysis &AA) { if (skipFunction(F)) return false; @@ -141,6 +161,7 @@ bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) { ArgCastInsertPt = &*getInsertPt(*F.begin()); this->MSSA = &MSSA; + this->AA = &AA; for (Argument &Arg : F.args()) { if (Arg.use_empty()) @@ -166,11 +187,13 @@ bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) { bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); - return run(F, MSSA); + AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + return run(F, MSSA, AA); } INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, "AMDGPU Promote Kernel Arguments", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, "AMDGPU Promote Kernel Arguments", false, false) @@ -185,7 +208,8 @@ PreservedAnalyses AMDGPUPromoteKernelArgumentsPass::run(Function &F, FunctionAnalysisManager &AM) { MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); - if (AMDGPUPromoteKernelArguments().run(F, MSSA)) { + AliasAnalysis &AA = AM.getResult<AAManager>(F); + if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) { PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); PA.preserve<MemorySSAAnalysis>(); |