aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/GCNSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNSubtarget.h')
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h61
1 files changed, 49 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 2017ae84353c..ef5470df876d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -15,10 +15,12 @@
#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
#include "AMDGPUCallLowering.h"
+#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIFrameLowering.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
@@ -51,7 +53,7 @@ private:
std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
protected:
// Basic subtarget description.
@@ -63,7 +65,6 @@ protected:
unsigned MaxPrivateElementSize = 0;
// Possibly statically set by tablegen, but may want to be overridden.
- bool FastFMAF32 = false;
bool FastDenormalF32 = false;
bool HalfRate64Ops = false;
bool FullRate64Ops = false;
@@ -132,7 +133,7 @@ protected:
bool HasA16 = false;
bool HasG16 = false;
bool HasNSAEncoding = false;
- unsigned NSAMaxSize = 0;
+ bool HasPartialNSAEncoding = false;
bool GFX10_AEncoding = false;
bool GFX10_BEncoding = false;
bool HasDLInsts = false;
@@ -146,12 +147,17 @@ protected:
bool HasDot7Insts = false;
bool HasDot8Insts = false;
bool HasDot9Insts = false;
+ bool HasDot10Insts = false;
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasPkFmacF16Inst = false;
+ bool HasAtomicDsPkAdd16Insts = false;
+ bool HasAtomicFlatPkAdd16Insts = false;
bool HasAtomicFaddRtnInsts = false;
bool HasAtomicFaddNoRtnInsts = false;
- bool HasAtomicPkFaddNoRtnInsts = false;
+ bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
+ bool HasAtomicBufferGlobalPkAddF16Insts = false;
+ bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
bool SupportsSRAMECC = false;
@@ -173,6 +179,7 @@ protected:
bool ScalarFlatScratchInsts = false;
bool HasArchitectedFlatScratch = false;
bool EnableFlatScratch = false;
+ bool HasArchitectedSGPRs = false;
bool AddNoCarryInsts = false;
bool HasUnpackedD16VMem = false;
bool LDSMisalignedBug = false;
@@ -198,6 +205,7 @@ protected:
bool HasMADIntraFwdBug = false;
bool HasVOPDInsts = false;
bool HasVALUTransUseHazard = false;
+ bool HasForceStoreSC0SC1 = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -248,7 +256,7 @@ public:
return Legalizer.get();
}
- const RegisterBankInfo *getRegBankInfo() const override {
+ const AMDGPURegisterBankInfo *getRegBankInfo() const override {
return RegBankInfo.get();
}
@@ -283,7 +291,7 @@ public:
/// Return the number of high bits known to be zero for a frame index.
unsigned getKnownHighZeroBitsForFrameIndex() const {
- return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
+ return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
}
int getLDSBankCount() const {
@@ -319,10 +327,6 @@ public:
return FP64;
}
- bool hasFastFMAF32() const {
- return FastFMAF32;
- }
-
bool hasHalfRate64Ops() const {
return HalfRate64Ops;
}
@@ -738,6 +742,10 @@ public:
return HasDot9Insts;
}
+ bool hasDot10Insts() const {
+ return HasDot10Insts;
+ }
+
bool hasMAIInsts() const {
return HasMAIInsts;
}
@@ -750,6 +758,10 @@ public:
return HasPkFmacF16Inst;
}
+ bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }
+
+ bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }
+
bool hasAtomicFaddInsts() const {
return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
}
@@ -758,7 +770,17 @@ public:
bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
- bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
+ bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {
+ return HasAtomicBufferGlobalPkAddF16NoRtnInsts;
+ }
+
+ bool hasAtomicBufferGlobalPkAddF16Insts() const {
+ return HasAtomicBufferGlobalPkAddF16Insts;
+ }
+
+ bool hasAtomicGlobalPkAddBF16Inst() const {
+ return HasAtomicGlobalPkAddBF16Inst;
+ }
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
@@ -924,7 +946,9 @@ public:
bool hasNSAEncoding() const { return HasNSAEncoding; }
- unsigned getNSAMaxSize() const { return NSAMaxSize; }
+ bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; }
+
+ unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); }
bool hasGFX10_AEncoding() const {
return GFX10_AEncoding;
@@ -1070,6 +1094,8 @@ public:
bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }
+ bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
+
bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
/// Return if operations acting on VGPR tuples require even alignment.
@@ -1126,6 +1152,9 @@ public:
/// In this case it is readonly.
bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }
+ /// \returns true if the architected SGPRs are enabled.
+ bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }
+
/// \returns true if the machine has merged shaders in which s0-s7 are
/// reserved by the hardware and user SGPRs start at s8
bool hasMergedShaders() const {
@@ -1323,6 +1352,14 @@ public:
// \returns the number of address arguments from which to enable MIMG NSA
// on supported architectures.
unsigned getNSAThreshold(const MachineFunction &MF) const;
+
+ // \returns true if the subtarget has a hazard requiring an "s_nop 0"
+ // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
+ bool requiresNopBeforeDeallocVGPRs() const {
+ // Currently all targets that support the dealloc VGPRs message also require
+ // the nop.
+ return true;
+ }
};
} // end namespace llvm