diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 196 |
1 files changed, 128 insertions, 68 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 623109733651..5584759e5580 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -63,7 +63,6 @@ private: Triple TargetTriple; protected: - const FeatureBitset &SubtargetFeatureBits; bool Has16BitInsts; bool HasMadMixInsts; bool FP32Denormals; @@ -72,13 +71,15 @@ protected: bool HasVOP3PInsts; bool HasMulI24; bool HasMulU24; + bool HasInv2PiInlineImm; bool HasFminFmaxLegacy; bool EnablePromoteAlloca; + bool HasTrigReducedRange; int LocalMemorySize; unsigned WavefrontSize; public: - AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); + AMDGPUSubtarget(const Triple &TT); static const AMDGPUSubtarget &get(const MachineFunction &MF); static const AMDGPUSubtarget &get(const TargetMachine &TM, @@ -134,7 +135,7 @@ public: return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv()); } - bool isAmdCodeObjectV2(const Function &F) const { + bool isAmdHsaOrMesa(const Function &F) const { return isAmdHsaOS() || isMesaKernel(F); } @@ -170,10 +171,18 @@ public: return HasMulU24; } + bool hasInv2PiInlineImm() const { + return HasInv2PiInlineImm; + } + bool hasFminFmaxLegacy() const { return HasFminFmaxLegacy; } + bool hasTrigReducedRange() const { + return HasTrigReducedRange; + } + bool isPromoteAllocaEnabled() const { return EnablePromoteAlloca; } @@ -193,38 +202,26 @@ public: /// Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const Function &F) const { - return isAmdCodeObjectV2(F) ? 0 : 36; + return isAmdHsaOrMesa(F) ? 0 : 36; } /// \returns Maximum number of work groups per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits, - FlatWorkGroupSize); - } + virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits); - } + virtual unsigned getMinFlatWorkGroupSize() const = 0; /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits); - } + virtual unsigned getMaxFlatWorkGroupSize() const = 0; /// \returns Maximum number of waves per execution unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits, - FlatWorkGroupSize); - } + virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0; /// \returns Minimum number of waves per execution unit supported by the /// subtarget. - unsigned getMinWavesPerEU() const { - return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits); - } + virtual unsigned getMinWavesPerEU() const = 0; unsigned getMaxWavesPerEU() const { return 10; } @@ -266,6 +263,7 @@ public: ISAVersion9_0_2, ISAVersion9_0_4, ISAVersion9_0_6, + ISAVersion9_0_9, }; enum TrapHandlerAbi { @@ -300,6 +298,7 @@ protected: Triple TargetTriple; unsigned Gen; unsigned IsaVersion; + InstrItineraryData InstrItins; int LDSBankCount; unsigned MaxPrivateElementSize; @@ -323,11 +322,11 @@ protected: // Used as options. bool EnableHugePrivateBuffer; - bool EnableVGPRSpilling; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; bool EnableSIScheduler; bool EnableDS128; + bool EnablePRTStrictNull; bool DumpCode; // Subtarget statically properties set by tablegen @@ -337,6 +336,7 @@ protected: bool IsGCN; bool GCN3Encoding; bool CIInsts; + bool VIInsts; bool GFX9Insts; bool SGPRInitBug; bool HasSMemRealTime; @@ -346,15 +346,16 @@ protected: bool HasVGPRIndexMode; bool HasScalarStores; bool HasScalarAtomics; - bool HasInv2PiInlineImm; bool HasSDWAOmod; bool HasSDWAScalar; bool HasSDWASdst; bool HasSDWAMac; bool HasSDWAOutModsVOPC; bool HasDPP; + bool HasR128A16; bool HasDLInsts; - bool D16PreservesUnusedBits; + bool HasDotInsts; + bool EnableSRAMECC; bool FlatAddressSpace; bool FlatInstOffsets; bool FlatGlobalInsts; @@ -372,7 +373,6 @@ protected: bool FeatureDisable; SelectionDAGTargetInfo TSInfo; - AMDGPUAS AS; private: SIInstrInfo InstrInfo; SITargetLowering TLInfo; @@ -423,6 +423,10 @@ public: return &TSInfo; } + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); Generation getGeneration() const { @@ -441,10 +445,6 @@ public: return MaxPrivateElementSize; } - AMDGPUAS getAMDGPUAS() const { - return AS; - } - bool hasIntClamp() const { return HasIntClamp; } @@ -517,6 +517,10 @@ public: return FMA; } + bool hasSwap() const { + return GFX9Insts; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } @@ -574,12 +578,19 @@ public: return getGeneration() < AMDGPUSubtarget::GFX9; } + /// \returns If target requires PRT Struct NULL support (zero result registers + /// for sparse texture support). + bool usePRTStrictNull() const { + return EnablePRTStrictNull; + } + bool hasAutoWaitcntBeforeBarrier() const { return AutoWaitcntBeforeBarrier; } bool hasCodeObjectV3() const { - return CodeObjectV3; + // FIXME: Need to add code object v3 support for mesa and pal. + return isAmdHsaOS() ? CodeObjectV3 : false; } bool hasUnalignedBufferAccess() const { @@ -677,8 +688,12 @@ public: return HasDLInsts; } - bool d16PreservesUnusedBits() const { - return D16PreservesUnusedBits; + bool hasDotInsts() const { + return HasDotInsts; + } + + bool isSRAMECCEnabled() const { + return EnableSRAMECC; } // Scratch is allocated in 256 dword per wave blocks for the entire @@ -707,20 +722,19 @@ public: /// \returns Number of execution units per compute unit supported by the /// subtarget. unsigned getEUsPerCU() const { - return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getEUsPerCU(this); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerCU() const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getMaxWavesPerCU(this); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(), - FlatWorkGroupSize); + return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize); } /// \returns Maximum number of waves per execution unit supported by the @@ -732,8 +746,7 @@ public: /// \returns Number of waves per work group supported by the subtarget and /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getWavesPerWorkGroup( - MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); + return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize); } // static wrappers @@ -747,8 +760,6 @@ public: void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; - bool isVGPRSpillingEnabled(const Function &F) const; - unsigned getMaxNumUserSGPRs() const { return 16; } @@ -781,14 +792,15 @@ public: return HasScalarAtomics; } - bool hasInv2PiInlineImm() const { - return HasInv2PiInlineImm; - } bool hasDPP() const { return HasDPP; } + bool hasR128A16() const { + return HasR128A16; + } + bool enableSIScheduler() const { return EnableSIScheduler; } @@ -817,6 +829,11 @@ public: return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } + // \returns true if the subtarget supports DWORDX3 load/store instructions. + bool hasDwordx3LoadStores() const { + return CIInsts; + } + bool hasSMovFedHazard() const { return getGeneration() >= AMDGPUSubtarget::GFX9; } @@ -851,39 +868,34 @@ public: /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { - return AMDGPU::IsaInfo::getSGPRAllocGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getSGPRAllocGranule(this); } /// \returns SGPR encoding granularity supported by the subtarget. unsigned getSGPREncodingGranule() const { - return AMDGPU::IsaInfo::getSGPREncodingGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getSGPREncodingGranule(this); } /// \returns Total number of SGPRs supported by the subtarget. unsigned getTotalNumSGPRs() const { - return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getTotalNumSGPRs(this); } /// \returns Addressable number of SGPRs supported by the subtarget. unsigned getAddressableNumSGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumSGPRs( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); } /// \returns Minimum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumSGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); } /// \returns Maximum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { - return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU, Addressable); + return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); } /// \returns Reserved number of SGPRs for given function \p MF. @@ -901,39 +913,34 @@ public: /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getVGPRAllocGranule(this); } /// \returns VGPR encoding granularity supported by the subtarget. unsigned getVGPREncodingGranule() const { - return AMDGPU::IsaInfo::getVGPREncodingGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getVGPREncodingGranule(this); } /// \returns Total number of VGPRs supported by the subtarget. unsigned getTotalNumVGPRs() const { - return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getTotalNumVGPRs(this); } /// \returns Addressable number of VGPRs supported by the subtarget. unsigned getAddressableNumVGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumVGPRs( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); } /// \returns Minimum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); } /// \returns Maximum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); } /// \returns Maximum number of VGPRs that meets number of waves per execution @@ -949,6 +956,34 @@ public: void getPostRAMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const override; + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } }; class R600Subtarget final : public R600GenSubtargetInfo, @@ -968,7 +1003,6 @@ private: R600TargetLowering TLInfo; InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; - AMDGPUAS AS; public: R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, @@ -1053,8 +1087,6 @@ public: short getTexVTXClauseSize() const { return TexVTXClauseSize; } - AMDGPUAS getAMDGPUAS() const { return AS; } - bool enableMachineScheduler() const override { return true; } @@ -1062,6 +1094,34 @@ public: bool enableSubRegLiveness() const override { return true; } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } }; } // end namespace llvm |