aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-11-19 20:06:13 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-11-19 20:06:13 +0000
commitc0981da47d5696fe36474fcf86b4ce03ae3ff818 (patch)
treef42add1021b9f2ac6a69ac7cf6c4499962739a45 /llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
parent344a3780b2e33f6ca763666c380202b18aab72a3 (diff)
downloadsrc-c0981da47d5696fe36474fcf86b4ce03ae3ff818.tar.gz
src-c0981da47d5696fe36474fcf86b4ce03ae3ff818.zip
Vendor import of llvm-project main llvmorg-14-init-10186-gff7f2cfa959b.vendor/llvm-project/llvmorg-14-init-10186-gff7f2cfa959b
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp195
1 files changed, 195 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
new file mode 100644
index 000000000000..01d03d17ec47
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
@@ -0,0 +1,195 @@
+//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass recursively promotes generic pointer arguments of a kernel
+/// into the global address space.
+///
+/// The pass walks kernel's pointer arguments, then loads from them. If a loaded
+/// value is a pointer and loaded pointer is unmodified in the kernel before the
+/// load, then promote loaded pointer to global. Then recursively continue.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUPromoteKernelArguments : public FunctionPass {
+ MemorySSA *MSSA;
+
+ Instruction *ArgCastInsertPt;
+
+ SmallVector<Value *> Ptrs;
+
+ void enqueueUsers(Value *Ptr);
+
+ bool promotePointer(Value *Ptr);
+
+public:
+ static char ID;
+
+ AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
+
+ bool run(Function &F, MemorySSA &MSSA);
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.setPreservesAll();
+ }
+};
+
+} // end anonymous namespace
+
+void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
+ SmallVector<User *> PtrUsers(Ptr->users());
+
+ while (!PtrUsers.empty()) {
+ Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
+ if (!U)
+ continue;
+
+ switch (U->getOpcode()) {
+ default:
+ break;
+ case Instruction::Load: {
+ LoadInst *LD = cast<LoadInst>(U);
+ PointerType *PT = dyn_cast<PointerType>(LD->getType());
+ if (!PT ||
+ (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) ||
+ LD->getPointerOperand()->stripInBoundsOffsets() != Ptr)
+ break;
+ const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD);
+ // TODO: This load poprobably can be promoted to constant address space.
+ if (MSSA->isLiveOnEntryDef(MA))
+ Ptrs.push_back(LD);
+ break;
+ }
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ case Instruction::BitCast:
+ if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
+ PtrUsers.append(U->user_begin(), U->user_end());
+ break;
+ }
+ }
+}
+
+bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
+ enqueueUsers(Ptr);
+
+ PointerType *PT = cast<PointerType>(Ptr->getType());
+ if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
+ return false;
+
+ bool IsArg = isa<Argument>(Ptr);
+ IRBuilder<> B(IsArg ? ArgCastInsertPt
+ : &*std::next(cast<Instruction>(Ptr)->getIterator()));
+
+ // Cast pointer to global address space and back to flat and let
+ // Infer Address Spaces pass to do all necessary rewriting.
+ PointerType *NewPT =
+ PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
+ Value *Cast =
+ B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
+ Value *CastBack =
+ B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
+ Ptr->replaceUsesWithIf(CastBack,
+ [Cast](Use &U) { return U.getUser() != Cast; });
+
+ return true;
+}
+
+// skip allocas
+static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
+ BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
+ for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
+
+ // If this is a dynamic alloca, the value may depend on the loaded kernargs,
+ // so loads will need to be inserted before it.
+ if (!AI || !AI->isStaticAlloca())
+ break;
+ }
+
+ return InsPt;
+}
+
+bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
+ if (skipFunction(F))
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
+ return false;
+
+ ArgCastInsertPt = &*getInsertPt(*F.begin());
+ this->MSSA = &MSSA;
+
+ for (Argument &Arg : F.args()) {
+ if (Arg.use_empty())
+ continue;
+
+ PointerType *PT = dyn_cast<PointerType>(Arg.getType());
+ if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
+ continue;
+
+ Ptrs.push_back(&Arg);
+ }
+
+ bool Changed = false;
+ while (!Ptrs.empty()) {
+ Value *Ptr = Ptrs.pop_back_val();
+ Changed |= promotePointer(Ptr);
+ }
+
+ return Changed;
+}
+
+bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
+ MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ return run(F, MSSA);
+}
+
+INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
+ "AMDGPU Promote Kernel Arguments", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
+ "AMDGPU Promote Kernel Arguments", false, false)
+
+char AMDGPUPromoteKernelArguments::ID = 0;
+
+FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
+ return new AMDGPUPromoteKernelArguments();
+}
+
+PreservedAnalyses
+AMDGPUPromoteKernelArgumentsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ if (AMDGPUPromoteKernelArguments().run(F, MSSA)) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}