aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp')
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp83
1 files changed, 73 insertions, 10 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index 8db319c73bb1..88934b65876e 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -15,15 +15,18 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "AMDGPUtti"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "AMDGPUtti"
+
// Declare the pass initialization routine locally as target-specific passes
// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
@@ -33,7 +36,7 @@ void initializeAMDGPUTTIPass(PassRegistry &);
namespace {
-class AMDGPUTTI : public ImmutablePass, public TargetTransformInfo {
+class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
const AMDGPUTargetMachine *TM;
const AMDGPUSubtarget *ST;
const AMDGPUTargetLowering *TLI;
@@ -43,7 +46,7 @@ class AMDGPUTTI : public ImmutablePass, public TargetTransformInfo {
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
@@ -53,11 +56,9 @@ public:
initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() { pushTTIStack(this); }
-
- virtual void finalizePass() { popTTIStack(); }
+ void initializePass() override { pushTTIStack(this); }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
@@ -65,13 +66,22 @@ public:
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo *)this;
return this;
}
- virtual bool hasBranchDivergence() const;
+ bool hasBranchDivergence() const override;
+
+ void getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const override;
+
+ PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override;
+
+ unsigned getNumberOfRegisters(bool Vector) const override;
+ unsigned getRegisterBitWidth(bool Vector) const override;
+ unsigned getMaximumUnrollFactor() const override;
/// @}
};
@@ -88,3 +98,56 @@ llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
}
bool AMDGPUTTI::hasBranchDivergence() const { return true; }
+
+void AMDGPUTTI::getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const {
+ for (const BasicBlock *BB : L->getBlocks()) {
+ for (const Instruction &I : *BB) {
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
+ if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ continue;
+
+ const Value *Ptr = GEP->getPointerOperand();
+ const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
+ if (Alloca) {
+ // We want to do whatever we can to limit the number of alloca
+ // instructions that make it through to the code generator. allocas
+ // require us to use indirect addressing, which is slow and prone to
+ // compiler bugs. If this loop does an address calculation on an
+ // alloca ptr, then we want to use a higher than normal loop unroll
+ // threshold. This will give SROA a better chance to eliminate these
+ // allocas.
+ //
+ // Don't use the maximum allowed value here as it will make some
+ // programs way too big.
+ UP.Threshold = 500;
+ }
+ }
+ }
+}
+
+AMDGPUTTI::PopcntSupportKind
+AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software;
+}
+
+unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
+ if (Vec)
+ return 0;
+
+ // Number of VGPRs on SI.
+ if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 256;
+
+ return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned AMDGPUTTI::getRegisterBitWidth(bool) const {
+ return 32;
+}
+
+unsigned AMDGPUTTI::getMaximumUnrollFactor() const {
+ // Semi-arbitrary large amount.
+ return 64;
+}