aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp27
1 files changed, 21 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index e64542a395f0..62ab5bb55a16 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -58,6 +58,21 @@ public:
} // end anonymous namespace
+// skip allocas
+static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
+ BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
+ for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
+
+ // If this is a dynamic alloca, the value may depend on the loaded kernargs,
+ // so loads will need to be inserted before it.
+ if (!AI || !AI->isStaticAlloca())
+ break;
+ }
+
+ return InsPt;
+}
+
bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
@@ -70,7 +85,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
LLVMContext &Ctx = F.getParent()->getContext();
const DataLayout &DL = F.getParent()->getDataLayout();
BasicBlock &EntryBlock = *F.begin();
- IRBuilder<> Builder(&*EntryBlock.begin());
+ IRBuilder<> Builder(&*getInsertPt(EntryBlock));
const Align KernArgBaseAlign(16); // FIXME: Increase if necessary
const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);
@@ -94,7 +109,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
for (Argument &Arg : F.args()) {
Type *ArgTy = Arg.getType();
- unsigned ABITypeAlign = DL.getABITypeAlignment(ArgTy);
+ Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
unsigned Size = DL.getTypeSizeInBits(ArgTy);
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
@@ -120,7 +135,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
continue;
}
- VectorType *VT = dyn_cast<VectorType>(ArgTy);
+ auto *VT = dyn_cast<FixedVectorType>(ArgTy);
bool IsV3 = VT && VT->getNumElements() == 3;
bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();
@@ -152,7 +167,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
}
if (IsV3 && Size >= 32) {
- V4Ty = VectorType::get(VT->getVectorElementType(), 4);
+ V4Ty = FixedVectorType::get(VT->getElementType(), 4);
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
AdjustedArgTy = V4Ty;
}
@@ -160,7 +175,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
ArgPtr->getName() + ".cast");
LoadInst *Load =
- Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign.value());
+ Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
MDBuilder MDB(Ctx);
@@ -210,7 +225,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
Arg.replaceAllUsesWith(NewVal);
} else if (IsV3) {
Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty),
- {0, 1, 2},
+ ArrayRef<int>{0, 1, 2},
Arg.getName() + ".load");
Arg.replaceAllUsesWith(Shuf);
} else {