diff options
Diffstat (limited to 'contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 83 |
1 files changed, 73 insertions, 10 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index 8db319c73bb1..88934b65876e 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -15,15 +15,18 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "AMDGPUtti" #include "AMDGPU.h" #include "AMDGPUTargetMachine.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "AMDGPUtti" + // Declare the pass initialization routine locally as target-specific passes // don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. @@ -33,7 +36,7 @@ void initializeAMDGPUTTIPass(PassRegistry &); namespace { -class AMDGPUTTI : public ImmutablePass, public TargetTransformInfo { +class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo { const AMDGPUTargetMachine *TM; const AMDGPUSubtarget *ST; const AMDGPUTargetLowering *TLI; @@ -43,7 +46,7 @@ class AMDGPUTTI : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } @@ -53,11 +56,9 @@ public: initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { pushTTIStack(this); } - - virtual void finalizePass() { popTTIStack(); } + void initializePass() override { pushTTIStack(this); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -65,13 +66,22 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo *)this; return this; } - virtual bool hasBranchDivergence() const; + bool hasBranchDivergence() const override; + + void getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const override; + + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override; + + unsigned getNumberOfRegisters(bool Vector) const override; + unsigned getRegisterBitWidth(bool Vector) const override; + unsigned getMaximumUnrollFactor() const override; /// @} }; @@ -88,3 +98,56 @@ llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) { } bool AMDGPUTTI::hasBranchDivergence() const { return true; } + +void AMDGPUTTI::getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const { + for (const BasicBlock *BB : L->getBlocks()) { + for (const Instruction &I : *BB) { + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I); + if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + continue; + + const Value *Ptr = GEP->getPointerOperand(); + const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr)); + if (Alloca) { + // We want to do whatever we can to limit the number of alloca + // instructions that make it through to the code generator. allocas + // require us to use indirect addressing, which is slow and prone to + // compiler bugs. If this loop does an address calculation on an + // alloca ptr, then we want to use a higher than normal loop unroll + // threshold. This will give SROA a better chance to eliminate these + // allocas. + // + // Don't use the maximum allowed value here as it will make some + // programs way too big. + UP.Threshold = 500; + } + } + } +} + +AMDGPUTTI::PopcntSupportKind +AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software; +} + +unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const { + if (Vec) + return 0; + + // Number of VGPRs on SI. + if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 256; + + return 4 * 128; // XXX - 4 channels. Should these count as vector instead? +} + +unsigned AMDGPUTTI::getRegisterBitWidth(bool) const { + return 32; +} + +unsigned AMDGPUTTI::getMaximumUnrollFactor() const { + // Semi-arbitrary large amount. + return 64; +} |