1 files changed, 128 insertions, 68 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 623109733651..5584759e5580 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -63,7 +63,6 @@ private:
   Triple TargetTriple;
 
 protected:
-  const FeatureBitset &SubtargetFeatureBits;
   bool Has16BitInsts;
   bool HasMadMixInsts;
   bool FP32Denormals;
@@ -72,13 +71,15 @@ protected:
   bool HasVOP3PInsts;
   bool HasMulI24;
   bool HasMulU24;
+  bool HasInv2PiInlineImm;
   bool HasFminFmaxLegacy;
   bool EnablePromoteAlloca;
+  bool HasTrigReducedRange;
   int LocalMemorySize;
   unsigned WavefrontSize;
 
 public:
-  AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+  AMDGPUSubtarget(const Triple &TT);
 
   static const AMDGPUSubtarget &get(const MachineFunction &MF);
   static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -134,7 +135,7 @@ public:
     return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
   }
 
-  bool isAmdCodeObjectV2(const Function &F) const {
+  bool isAmdHsaOrMesa(const Function &F) const {
     return isAmdHsaOS() || isMesaKernel(F);
   }
 
@@ -170,10 +171,18 @@ public:
     return HasMulU24;
   }
 
+  bool hasInv2PiInlineImm() const {
+    return HasInv2PiInlineImm;
+  }
+
   bool hasFminFmaxLegacy() const {
     return HasFminFmaxLegacy;
   }
 
+  bool hasTrigReducedRange() const {
+    return HasTrigReducedRange;
+  }
+
   bool isPromoteAllocaEnabled() const {
     return EnablePromoteAlloca;
   }
@@ -193,38 +202,26 @@ public:
   /// Returns the offset in bytes from the start of the input buffer
   ///        of the first explicit kernel argument.
   unsigned getExplicitKernelArgOffset(const Function &F) const {
-    return isAmdCodeObjectV2(F) ? 0 : 36;
+    return isAmdHsaOrMesa(F) ? 0 : 36;
   }
 
   /// \returns Maximum number of work groups per compute unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
-  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
-                                                  FlatWorkGroupSize);
-  }
+  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
 
   /// \returns Minimum flat work group size supported by the subtarget.
-  unsigned getMinFlatWorkGroupSize() const {
-    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
-  }
+  virtual unsigned getMinFlatWorkGroupSize() const = 0;
 
   /// \returns Maximum flat work group size supported by the subtarget.
-  unsigned getMaxFlatWorkGroupSize() const {
-    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
-  }
+  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
 
   /// \returns Maximum number of waves per execution unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
-  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
-                                             FlatWorkGroupSize);
-  }
+  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const  = 0;
 
   /// \returns Minimum number of waves per execution unit supported by the
   /// subtarget.
-  unsigned getMinWavesPerEU() const {
-    return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
-  }
+  virtual unsigned getMinWavesPerEU() const = 0;
 
   unsigned getMaxWavesPerEU() const { return 10; }
 
@@ -266,6 +263,7 @@ public:
     ISAVersion9_0_2,
     ISAVersion9_0_4,
     ISAVersion9_0_6,
+    ISAVersion9_0_9,
   };
 
   enum TrapHandlerAbi {
@@ -300,6 +298,7 @@ protected:
   Triple TargetTriple;
   unsigned Gen;
   unsigned IsaVersion;
+  InstrItineraryData InstrItins;
   int LDSBankCount;
   unsigned MaxPrivateElementSize;
 
@@ -323,11 +322,11 @@ protected:
 
   // Used as options.
   bool EnableHugePrivateBuffer;
-  bool EnableVGPRSpilling;
   bool EnableLoadStoreOpt;
   bool EnableUnsafeDSOffsetFolding;
   bool EnableSIScheduler;
   bool EnableDS128;
+  bool EnablePRTStrictNull;
   bool DumpCode;
 
   // Subtarget statically properties set by tablegen
@@ -337,6 +336,7 @@ protected:
   bool IsGCN;
   bool GCN3Encoding;
   bool CIInsts;
+  bool VIInsts;
   bool GFX9Insts;
   bool SGPRInitBug;
   bool HasSMemRealTime;
@@ -346,15 +346,16 @@ protected:
   bool HasVGPRIndexMode;
   bool HasScalarStores;
   bool HasScalarAtomics;
-  bool HasInv2PiInlineImm;
   bool HasSDWAOmod;
   bool HasSDWAScalar;
   bool HasSDWASdst;
   bool HasSDWAMac;
   bool HasSDWAOutModsVOPC;
   bool HasDPP;
+  bool HasR128A16;
   bool HasDLInsts;
-  bool D16PreservesUnusedBits;
+  bool HasDotInsts;
+  bool EnableSRAMECC;
   bool FlatAddressSpace;
   bool FlatInstOffsets;
   bool FlatGlobalInsts;
@@ -372,7 +373,6 @@ protected:
   bool FeatureDisable;
 
   SelectionDAGTargetInfo TSInfo;
-  AMDGPUAS AS;
 private:
   SIInstrInfo InstrInfo;
   SITargetLowering TLInfo;
@@ -423,6 +423,10 @@ public:
     return &TSInfo;
   }
 
+  const InstrItineraryData *getInstrItineraryData() const override {
+    return &InstrItins;
+  }
+
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   Generation getGeneration() const {
@@ -441,10 +445,6 @@ public:
     return MaxPrivateElementSize;
   }
 
-  AMDGPUAS getAMDGPUAS() const {
-    return AS;
-  }
-
   bool hasIntClamp() const {
     return HasIntClamp;
   }
@@ -517,6 +517,10 @@ public:
     return FMA;
   }
 
+  bool hasSwap() const {
+    return GFX9Insts;
+  }
+
   TrapHandlerAbi getTrapHandlerAbi() const {
     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
   }
@@ -574,12 +578,19 @@ public:
     return getGeneration() < AMDGPUSubtarget::GFX9;
   }
 
+  /// \returns If target requires PRT Struct NULL support (zero result registers
+  /// for sparse texture support).
+  bool usePRTStrictNull() const {
+    return EnablePRTStrictNull;
+  }
+
   bool hasAutoWaitcntBeforeBarrier() const {
     return AutoWaitcntBeforeBarrier;
   }
 
   bool hasCodeObjectV3() const {
-    return CodeObjectV3;
+    // FIXME: Need to add code object v3 support for mesa and pal.
+    return isAmdHsaOS() ? CodeObjectV3 : false;
   }
 
   bool hasUnalignedBufferAccess() const {
@@ -677,8 +688,12 @@ public:
     return HasDLInsts;
   }
 
-  bool d16PreservesUnusedBits() const {
-    return D16PreservesUnusedBits;
+  bool hasDotInsts() const {
+    return HasDotInsts;
+  }
+
+  bool isSRAMECCEnabled() const {
+    return EnableSRAMECC;
   }
 
   // Scratch is allocated in 256 dword per wave blocks for the entire
@@ -707,20 +722,19 @@ public:
   /// \returns Number of execution units per compute unit supported by the
   /// subtarget.
   unsigned getEUsPerCU() const {
-    return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getEUsPerCU(this);
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget without any kind of limitation.
   unsigned getMaxWavesPerCU() const {
-    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
-                                             FlatWorkGroupSize);
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
   }
 
   /// \returns Maximum number of waves per execution unit supported by the
@@ -732,8 +746,7 @@ public:
   /// \returns Number of waves per work group supported by the subtarget and
   /// limited by given \p FlatWorkGroupSize.
   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getWavesPerWorkGroup(
-        MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
+    return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
   }
 
   // static wrappers
@@ -747,8 +760,6 @@ public:
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
                            unsigned NumRegionInstrs) const override;
 
-  bool isVGPRSpillingEnabled(const Function &F) const;
-
   unsigned getMaxNumUserSGPRs() const {
     return 16;
   }
@@ -781,14 +792,15 @@ public:
     return HasScalarAtomics;
   }
 
-  bool hasInv2PiInlineImm() const {
-    return HasInv2PiInlineImm;
-  }
 
   bool hasDPP() const {
     return HasDPP;
   }
 
+  bool hasR128A16() const {
+    return HasR128A16;
+  }
+
   bool enableSIScheduler() const {
     return EnableSIScheduler;
   }
@@ -817,6 +829,11 @@ public:
     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
   }
 
+  // \returns true if the subtarget supports DWORDX3 load/store instructions.
+  bool hasDwordx3LoadStores() const {
+    return CIInsts;
+  }
+
   bool hasSMovFedHazard() const {
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
@@ -851,39 +868,34 @@ public:
 
   /// \returns SGPR allocation granularity supported by the subtarget.
   unsigned getSGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getSGPRAllocGranule(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
   }
 
   /// \returns SGPR encoding granularity supported by the subtarget.
   unsigned getSGPREncodingGranule() const {
-    return AMDGPU::IsaInfo::getSGPREncodingGranule(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
   }
 
   /// \returns Total number of SGPRs supported by the subtarget.
   unsigned getTotalNumSGPRs() const {
-    return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
   }
 
   /// \returns Addressable number of SGPRs supported by the subtarget.
   unsigned getAddressableNumSGPRs() const {
-    return AMDGPU::IsaInfo::getAddressableNumSGPRs(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
   }
 
   /// \returns Minimum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
-                                           WavesPerEU);
+    return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
   }
 
   /// \returns Maximum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
-    return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
-                                           WavesPerEU, Addressable);
+    return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
   }
 
   /// \returns Reserved number of SGPRs for given function \p MF.
@@ -901,39 +913,34 @@ public:
 
   /// \returns VGPR allocation granularity supported by the subtarget.
   unsigned getVGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getVGPRAllocGranule(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
   }
 
   /// \returns VGPR encoding granularity supported by the subtarget.
   unsigned getVGPREncodingGranule() const {
-    return AMDGPU::IsaInfo::getVGPREncodingGranule(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
   }
 
   /// \returns Total number of VGPRs supported by the subtarget.
   unsigned getTotalNumVGPRs() const {
-    return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
   }
 
   /// \returns Addressable number of VGPRs supported by the subtarget.
   unsigned getAddressableNumVGPRs() const {
-    return AMDGPU::IsaInfo::getAddressableNumVGPRs(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
   }
 
   /// \returns Minimum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
-                                           WavesPerEU);
+    return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
-                                           WavesPerEU);
+    return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -949,6 +956,34 @@ public:
   void getPostRAMutations(
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
       const override;
+
+  /// \returns Maximum number of work groups per compute unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum flat work group size supported by the subtarget.
+  unsigned getMinFlatWorkGroupSize() const override {
+    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+  }
+
+  /// \returns Maximum flat work group size supported by the subtarget.
+  unsigned getMaxFlatWorkGroupSize() const override {
+    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+    return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum number of waves per execution unit supported by the
+  /// subtarget.
+  unsigned getMinWavesPerEU() const override {
+    return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+  }
 };
 
 class R600Subtarget final : public R600GenSubtargetInfo,
@@ -968,7 +1003,6 @@ private:
   R600TargetLowering TLInfo;
   InstrItineraryData InstrItins;
   SelectionDAGTargetInfo TSInfo;
-  AMDGPUAS AS;
 
 public:
   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -1053,8 +1087,6 @@ public:
 
   short getTexVTXClauseSize() const { return TexVTXClauseSize; }
 
-  AMDGPUAS getAMDGPUAS() const { return AS; }
-
   bool enableMachineScheduler() const override {
     return true;
   }
@@ -1062,6 +1094,34 @@ public:
   bool enableSubRegLiveness() const override {
     return true;
   }
+
+  /// \returns Maximum number of work groups per compute unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum flat work group size supported by the subtarget.
+  unsigned getMinFlatWorkGroupSize() const override {
+    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+  }
+
+  /// \returns Maximum flat work group size supported by the subtarget.
+  unsigned getMaxFlatWorkGroupSize() const override {
+    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+    return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum number of waves per execution unit supported by the
+  /// subtarget.
+  unsigned getMinWavesPerEU() const override {
+    return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+  }
 };
 
 } // end namespace llvm