diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-11-19 20:06:13 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-11-19 20:06:13 +0000 |
commit | c0981da47d5696fe36474fcf86b4ce03ae3ff818 (patch) | |
tree | f42add1021b9f2ac6a69ac7cf6c4499962739a45 /llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp | |
parent | 344a3780b2e33f6ca763666c380202b18aab72a3 (diff) | |
download | src-c0981da47d5696fe36474fcf86b4ce03ae3ff818.tar.gz src-c0981da47d5696fe36474fcf86b4ce03ae3ff818.zip |
Vendor import of llvm-project main llvmorg-14-init-10186-gff7f2cfa959b.vendor/llvm-project/llvmorg-14-init-10186-gff7f2cfa959b
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp new file mode 100644 index 000000000000..01d03d17ec47 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp @@ -0,0 +1,195 @@ +//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This pass recursively promotes generic pointer arguments of a kernel +/// into the global address space. +/// +/// The pass walks kernel's pointer arguments, then loads from them. If a loaded +/// value is a pointer and loaded pointer is unmodified in the kernel before the +/// load, then promote loaded pointer to global. Then recursively continue. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/InitializePasses.h" + +#define DEBUG_TYPE "amdgpu-promote-kernel-arguments" + +using namespace llvm; + +namespace { + +class AMDGPUPromoteKernelArguments : public FunctionPass { + MemorySSA *MSSA; + + Instruction *ArgCastInsertPt; + + SmallVector<Value *> Ptrs; + + void enqueueUsers(Value *Ptr); + + bool promotePointer(Value *Ptr); + +public: + static char ID; + + AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} + + bool run(Function &F, MemorySSA &MSSA); + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MemorySSAWrapperPass>(); + AU.setPreservesAll(); + } +}; + +} // end anonymous namespace + +void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { + SmallVector<User *> PtrUsers(Ptr->users()); + + while (!PtrUsers.empty()) { + Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val()); + if (!U) + continue; + + switch (U->getOpcode()) { + default: + break; + case Instruction::Load: { + LoadInst *LD = cast<LoadInst>(U); + PointerType *PT = dyn_cast<PointerType>(LD->getType()); + if (!PT || + (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && + PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && + PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) || + LD->getPointerOperand()->stripInBoundsOffsets() != Ptr) + break; + const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD); + // TODO: This load poprobably can be promoted to constant address space. + if (MSSA->isLiveOnEntryDef(MA)) + Ptrs.push_back(LD); + break; + } + case Instruction::GetElementPtr: + case Instruction::AddrSpaceCast: + case Instruction::BitCast: + if (U->getOperand(0)->stripInBoundsOffsets() == Ptr) + PtrUsers.append(U->user_begin(), U->user_end()); + break; + } + } +} + +bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { + enqueueUsers(Ptr); + + PointerType *PT = cast<PointerType>(Ptr->getType()); + if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) + return false; + + bool IsArg = isa<Argument>(Ptr); + IRBuilder<> B(IsArg ? ArgCastInsertPt + : &*std::next(cast<Instruction>(Ptr)->getIterator())); + + // Cast pointer to global address space and back to flat and let + // Infer Address Spaces pass to do all necessary rewriting. + PointerType *NewPT = + PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS); + Value *Cast = + B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global")); + Value *CastBack = + B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat")); + Ptr->replaceUsesWithIf(CastBack, + [Cast](Use &U) { return U.getUser() != Cast; }); + + return true; +} + +// skip allocas +static BasicBlock::iterator getInsertPt(BasicBlock &BB) { + BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); + for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) { + AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt); + + // If this is a dynamic alloca, the value may depend on the loaded kernargs, + // so loads will need to be inserted before it. + if (!AI || !AI->isStaticAlloca()) + break; + } + + return InsPt; +} + +bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) { + if (skipFunction(F)) + return false; + + CallingConv::ID CC = F.getCallingConv(); + if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) + return false; + + ArgCastInsertPt = &*getInsertPt(*F.begin()); + this->MSSA = &MSSA; + + for (Argument &Arg : F.args()) { + if (Arg.use_empty()) + continue; + + PointerType *PT = dyn_cast<PointerType>(Arg.getType()); + if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && + PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && + PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)) + continue; + + Ptrs.push_back(&Arg); + } + + bool Changed = false; + while (!Ptrs.empty()) { + Value *Ptr = Ptrs.pop_back_val(); + Changed |= promotePointer(Ptr); + } + + return Changed; +} + +bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { + MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); + return run(F, MSSA); +} + +INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, + "AMDGPU Promote Kernel Arguments", false, false) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, + "AMDGPU Promote Kernel Arguments", false, false) + +char AMDGPUPromoteKernelArguments::ID = 0; + +FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() { + return new AMDGPUPromoteKernelArguments(); +} + +PreservedAnalyses +AMDGPUPromoteKernelArgumentsPass::run(Function &F, + FunctionAnalysisManager &AM) { + MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); + if (AMDGPUPromoteKernelArguments().run(F, MSSA)) { + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + PA.preserve<MemorySSAAnalysis>(); + return PA; + } + return PreservedAnalyses::all(); +} |