aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h196
1 files changed, 128 insertions, 68 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 623109733651..5584759e5580 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -63,7 +63,6 @@ private:
Triple TargetTriple;
protected:
- const FeatureBitset &SubtargetFeatureBits;
bool Has16BitInsts;
bool HasMadMixInsts;
bool FP32Denormals;
@@ -72,13 +71,15 @@ protected:
bool HasVOP3PInsts;
bool HasMulI24;
bool HasMulU24;
+ bool HasInv2PiInlineImm;
bool HasFminFmaxLegacy;
bool EnablePromoteAlloca;
+ bool HasTrigReducedRange;
int LocalMemorySize;
unsigned WavefrontSize;
public:
- AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+ AMDGPUSubtarget(const Triple &TT);
static const AMDGPUSubtarget &get(const MachineFunction &MF);
static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -134,7 +135,7 @@ public:
return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
}
- bool isAmdCodeObjectV2(const Function &F) const {
+ bool isAmdHsaOrMesa(const Function &F) const {
return isAmdHsaOS() || isMesaKernel(F);
}
@@ -170,10 +171,18 @@ public:
return HasMulU24;
}
+ bool hasInv2PiInlineImm() const {
+ return HasInv2PiInlineImm;
+ }
+
bool hasFminFmaxLegacy() const {
return HasFminFmaxLegacy;
}
+ bool hasTrigReducedRange() const {
+ return HasTrigReducedRange;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -193,38 +202,26 @@ public:
/// Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const Function &F) const {
- return isAmdCodeObjectV2(F) ? 0 : 36;
+ return isAmdHsaOrMesa(F) ? 0 : 36;
}
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
- FlatWorkGroupSize);
- }
+ virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
- }
+ virtual unsigned getMinFlatWorkGroupSize() const = 0;
/// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
- }
+ virtual unsigned getMaxFlatWorkGroupSize() const = 0;
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
- FlatWorkGroupSize);
- }
+ virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
- unsigned getMinWavesPerEU() const {
- return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
- }
+ virtual unsigned getMinWavesPerEU() const = 0;
unsigned getMaxWavesPerEU() const { return 10; }
@@ -266,6 +263,7 @@ public:
ISAVersion9_0_2,
ISAVersion9_0_4,
ISAVersion9_0_6,
+ ISAVersion9_0_9,
};
enum TrapHandlerAbi {
@@ -300,6 +298,7 @@ protected:
Triple TargetTriple;
unsigned Gen;
unsigned IsaVersion;
+ InstrItineraryData InstrItins;
int LDSBankCount;
unsigned MaxPrivateElementSize;
@@ -323,11 +322,11 @@ protected:
// Used as options.
bool EnableHugePrivateBuffer;
- bool EnableVGPRSpilling;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler;
bool EnableDS128;
+ bool EnablePRTStrictNull;
bool DumpCode;
// Subtarget statically properties set by tablegen
@@ -337,6 +336,7 @@ protected:
bool IsGCN;
bool GCN3Encoding;
bool CIInsts;
+ bool VIInsts;
bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
@@ -346,15 +346,16 @@ protected:
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasScalarAtomics;
- bool HasInv2PiInlineImm;
bool HasSDWAOmod;
bool HasSDWAScalar;
bool HasSDWASdst;
bool HasSDWAMac;
bool HasSDWAOutModsVOPC;
bool HasDPP;
+ bool HasR128A16;
bool HasDLInsts;
- bool D16PreservesUnusedBits;
+ bool HasDotInsts;
+ bool EnableSRAMECC;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
@@ -372,7 +373,6 @@ protected:
bool FeatureDisable;
SelectionDAGTargetInfo TSInfo;
- AMDGPUAS AS;
private:
SIInstrInfo InstrInfo;
SITargetLowering TLInfo;
@@ -423,6 +423,10 @@ public:
return &TSInfo;
}
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
Generation getGeneration() const {
@@ -441,10 +445,6 @@ public:
return MaxPrivateElementSize;
}
- AMDGPUAS getAMDGPUAS() const {
- return AS;
- }
-
bool hasIntClamp() const {
return HasIntClamp;
}
@@ -517,6 +517,10 @@ public:
return FMA;
}
+ bool hasSwap() const {
+ return GFX9Insts;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
@@ -574,12 +578,19 @@ public:
return getGeneration() < AMDGPUSubtarget::GFX9;
}
+ /// \returns If target requires PRT Struct NULL support (zero result registers
+ /// for sparse texture support).
+ bool usePRTStrictNull() const {
+ return EnablePRTStrictNull;
+ }
+
bool hasAutoWaitcntBeforeBarrier() const {
return AutoWaitcntBeforeBarrier;
}
bool hasCodeObjectV3() const {
- return CodeObjectV3;
+ // FIXME: Need to add code object v3 support for mesa and pal.
+ return isAmdHsaOS() ? CodeObjectV3 : false;
}
bool hasUnalignedBufferAccess() const {
@@ -677,8 +688,12 @@ public:
return HasDLInsts;
}
- bool d16PreservesUnusedBits() const {
- return D16PreservesUnusedBits;
+ bool hasDotInsts() const {
+ return HasDotInsts;
+ }
+
+ bool isSRAMECCEnabled() const {
+ return EnableSRAMECC;
}
// Scratch is allocated in 256 dword per wave blocks for the entire
@@ -707,20 +722,19 @@ public:
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getEUsPerCU(this);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
- FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
}
/// \returns Maximum number of waves per execution unit supported by the
@@ -732,8 +746,7 @@ public:
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(
- MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
}
// static wrappers
@@ -747,8 +760,6 @@ public:
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
- bool isVGPRSpillingEnabled(const Function &F) const;
-
unsigned getMaxNumUserSGPRs() const {
return 16;
}
@@ -781,14 +792,15 @@ public:
return HasScalarAtomics;
}
- bool hasInv2PiInlineImm() const {
- return HasInv2PiInlineImm;
- }
bool hasDPP() const {
return HasDPP;
}
+ bool hasR128A16() const {
+ return HasR128A16;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@@ -817,6 +829,11 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ // \returns true if the subtarget supports DWORDX3 load/store instructions.
+ bool hasDwordx3LoadStores() const {
+ return CIInsts;
+ }
+
bool hasSMovFedHazard() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
@@ -851,39 +868,34 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getSGPRAllocGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getSGPREncodingGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumSGPRs(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
}
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
- return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU, Addressable);
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
}
/// \returns Reserved number of SGPRs for given function \p MF.
@@ -901,39 +913,34 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getVGPREncodingGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumVGPRs(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -949,6 +956,34 @@ public:
void getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
};
class R600Subtarget final : public R600GenSubtargetInfo,
@@ -968,7 +1003,6 @@ private:
R600TargetLowering TLInfo;
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
- AMDGPUAS AS;
public:
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -1053,8 +1087,6 @@ public:
short getTexVTXClauseSize() const { return TexVTXClauseSize; }
- AMDGPUAS getAMDGPUAS() const { return AS; }
-
bool enableMachineScheduler() const override {
return true;
}
@@ -1062,6 +1094,34 @@ public:
bool enableSubRegLiveness() const override {
return true;
}
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
};
} // end namespace llvm