aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIInstrInfo.td
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
commit1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/Target/AMDGPU/SIInstrInfo.td
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
downloadsrc-1d5ae1026e831016fc29fd927877c86af904481f.tar.gz
src-1d5ae1026e831016fc29fd927877c86af904481f.zip
Vendor import of stripped llvm trunk r375505, the last commit before thevendor/llvm/llvm-trunk-r375505vendor/llvm
upstream Subversion repository was made read-only, and the LLVM project migrated to GitHub: https://llvm.org/svn/llvm-project/llvm/trunk@375505
Notes
Notes: svn path=/vendor/llvm/dist/; revision=353940 svn path=/vendor/llvm/llvm-r375505/; revision=353941; tag=vendor/llvm/llvm-trunk-r375505
Diffstat (limited to 'lib/Target/AMDGPU/SIInstrInfo.td')
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td320
1 files changed, 211 insertions, 109 deletions
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index c382c816e0b4..1eecbf555613 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -84,7 +84,7 @@ def SDTtbuffer_load : SDTypeProfile<1, 8,
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
SDTCisVT<6, i32>, // format(imm)
- SDTCisVT<7, i32>, // cachecontrol(imm)
+ SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
SDTCisVT<8, i1> // idxen(imm)
]>;
@@ -102,7 +102,7 @@ def SDTtbuffer_store : SDTypeProfile<0, 9,
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
SDTCisVT<6, i32>, // format(imm)
- SDTCisVT<7, i32>, // cachecontrol(imm)
+ SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
SDTCisVT<8, i1> // idxen(imm)
]>;
@@ -119,7 +119,7 @@ def SDTBufferLoad : SDTypeProfile<1, 7,
SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
SDTCisVT<7, i1>]>; // idxen(imm)
def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
@@ -145,7 +145,7 @@ def SDTBufferStore : SDTypeProfile<0, 8,
SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
SDTCisVT<7, i1>]>; // idxen(imm)
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
@@ -198,6 +198,8 @@ def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
+def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
+def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
@@ -264,6 +266,11 @@ def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
>;
+def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
+ SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
+ [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]
+>;
+
//===----------------------------------------------------------------------===//
// ValueType helpers
//===----------------------------------------------------------------------===//
@@ -277,7 +284,9 @@ class isFloatType<ValueType SrcVT> {
!if(!eq(SrcVT.Value, f64.Value), 1,
!if(!eq(SrcVT.Value, v2f16.Value), 1,
!if(!eq(SrcVT.Value, v4f16.Value), 1,
- 0)))));
+ !if(!eq(SrcVT.Value, v2f32.Value), 1,
+ !if(!eq(SrcVT.Value, v2f64.Value), 1,
+ 0)))))));
}
class isIntType<ValueType SrcVT> {
@@ -300,14 +309,36 @@ class isPackedType<ValueType SrcVT> {
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
-defm atomic_inc_global : global_binary_atomic_op<SIatomic_inc>;
-defm atomic_dec_global : global_binary_atomic_op<SIatomic_dec>;
+foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
+let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
+
-def atomic_inc_local : local_binary_atomic_op<SIatomic_inc>;
-def atomic_dec_local : local_binary_atomic_op<SIatomic_dec>;
-def atomic_load_fadd_local : local_binary_atomic_op<atomic_load_fadd>;
-def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>;
-def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>;
+defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>;
+defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>;
+defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>;
+defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
+
+
+} // End let AddressSpaces = ...
+} // End foreach AddrSpace
+
+def atomic_fadd_global_noret : PatFrag<
+ (ops node:$ptr, node:$value),
+ (SIglobal_atomic_fadd node:$ptr, node:$value)> {
+ // FIXME: Move this
+ let MemoryVT = f32;
+ let IsAtomic = 1;
+ let AddressSpaces = StoreAddress_global.AddrSpaces;
+}
+
+def atomic_pk_fadd_global_noret : PatFrag<
+ (ops node:$ptr, node:$value),
+ (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> {
+ // FIXME: Move this
+ let MemoryVT = v2f16;
+ let IsAtomic = 1;
+ let AddressSpaces = StoreAddress_global.AddrSpaces;
+}
//===----------------------------------------------------------------------===//
// SDNodes PatFrags for loads/stores with a glue input.
@@ -328,10 +359,12 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
>;
def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
+ let IsLoad = 1;
let IsUnindexed = 1;
}
def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
+ let IsLoad = 1;
let IsNonExtLoad = 1;
}
@@ -347,14 +380,15 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr),
let MemoryVT = i64;
}
-def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
+def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
let IsLoad = 1;
let IsAnyExtLoad = 1;
}
-def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
-}]>;
+def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let IsSignExtLoad = 1;
+}
def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
let IsLoad = 1;
@@ -391,25 +425,50 @@ def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
let MemoryVT = i16;
}
-def load_glue_align8 : Aligned8Bytes <
- (ops node:$ptr), (load_glue node:$ptr)
->;
-def load_glue_align16 : Aligned16Bytes <
- (ops node:$ptr), (load_glue node:$ptr)
->;
+let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
+def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
+ let IsNonExtLoad = 1;
+}
-def load_local_m0 : LoadFrag<load_glue>, LocalAddress;
-def sextloadi8_local_m0 : LoadFrag<sextloadi8_glue>, LocalAddress;
-def sextloadi16_local_m0 : LoadFrag<sextloadi16_glue>, LocalAddress;
-def extloadi8_local_m0 : LoadFrag<extloadi8_glue>, LocalAddress;
-def zextloadi8_local_m0 : LoadFrag<zextloadi8_glue>, LocalAddress;
-def extloadi16_local_m0 : LoadFrag<extloadi16_glue>, LocalAddress;
-def zextloadi16_local_m0 : LoadFrag<zextloadi16_glue>, LocalAddress;
-def load_align8_local_m0 : LoadFrag <load_glue_align8>, LocalAddress;
-def load_align16_local_m0 : LoadFrag <load_glue_align16>, LocalAddress;
-def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress;
-def atomic_load_64_local_m0 : LoadFrag<atomic_load_64_glue>, LocalAddress;
+let MemoryVT = i8 in {
+def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
+def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
+def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
+}
+
+let MemoryVT = i16 in {
+def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
+def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
+def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
+}
+
+def load_align8_local_m0 : PatFrag<(ops node:$ptr),
+ (load_local_m0 node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+ let MinAlignment = 8;
+}
+def load_align16_local_m0 : PatFrag<(ops node:$ptr),
+ (load_local_m0 node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+ let MinAlignment = 16;
+}
+
+} // End IsLoad = 1
+
+let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
+def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_32_glue node:$ptr)> {
+ let MemoryVT = i32;
+}
+def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_64_glue node:$ptr)> {
+ let MemoryVT = i64;
+}
+
+} // End let AddressSpaces = LoadAddress_local.AddrSpaces
def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
@@ -420,50 +479,88 @@ def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
>;
-def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val),
- (AMDGPUatomic_st_glue node:$ptr, node:$val)> {
-}
-
def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
- (AMDGPUst_glue node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
-}]>;
+ (AMDGPUst_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsUnindexed = 1;
+}
def store_glue : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr), [{
- return !cast<StoreSDNode>(N)->isTruncatingStore();
-}]>;
+ (unindexedstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->isTruncatingStore();
-}]>;
+ (unindexedstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 1;
+}
def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
- (truncstore_glue node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+ (truncstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i8;
+}
def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
- (truncstore_glue node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+ (truncstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i16;
+}
-def store_glue_align8 : Aligned8Bytes <
- (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr)
->;
+let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
+def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (store_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
-def store_glue_align16 : Aligned16Bytes <
- (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr)
->;
+def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (unindexedstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i8;
+}
+
+def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
+ (unindexedstore_glue node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i16;
+}
+}
+
+def store_align16_local_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+ let MinAlignment = 16;
+}
-def store_local_m0 : StoreFrag<store_glue>, LocalAddress;
-def truncstorei8_local_m0 : StoreFrag<truncstorei8_glue>, LocalAddress;
-def truncstorei16_local_m0 : StoreFrag<truncstorei16_glue>, LocalAddress;
-def atomic_store_local_m0 : StoreFrag<AMDGPUatomic_st_glue>, LocalAddress;
+def store_align8_local_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+ let MinAlignment = 8;
+}
+
+let AddressSpaces = StoreAddress_local.AddrSpaces in {
+
+def atomic_store_local_32_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i32;
+}
+def atomic_store_local_64_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i64;
+}
+} // End let AddressSpaces = StoreAddress_local.AddrSpaces
-def store_align8_local_m0 : StoreFrag<store_glue_align8>, LocalAddress;
-def store_align16_local_m0 : StoreFrag<store_glue_align16>, LocalAddress;
def si_setcc_uniform : PatFrag <
(ops node:$lhs, node:$rhs, node:$cond),
@@ -539,16 +636,27 @@ def lshl_rev : PatFrag <
(shl $src0, $src1)
>;
+def add_ctpop : PatFrag <
+ (ops node:$src0, node:$src1),
+ (add (ctpop $src0), $src1)
+>;
+
multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
- SDTypeProfile tc = SDTAtomic2> {
+ SDTypeProfile tc = SDTAtomic2,
+ bit IsInt = 1> {
def _glue : SDNode <
!if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
>;
- def _local_m0 : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
- def _region_m0 : region_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
+ let AddressSpaces = StoreAddress_local.AddrSpaces in {
+ defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
+ }
+
+ let AddressSpaces = StoreAddress_region.AddrSpaces in {
+ defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
+ }
}
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
@@ -563,17 +671,9 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
-defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>;
-defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>;
-defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>;
-
-def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
->;
-
-def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>;
-def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>;
-
+defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
+defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
+defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
def as_i1imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
@@ -591,6 +691,10 @@ def as_i32imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
+def as_i32timm: SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
def as_i64imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
}]>;
@@ -627,9 +731,13 @@ def SIMM16bit : ImmLeaf <i32,
>;
def UIMM16bit : ImmLeaf <i32,
- [{return isUInt<16>(Imm); }]
+ [{return isUInt<16>(Imm);}]
>;
+def i64imm_32bit : ImmLeaf<i64, [{
+ return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
+}]>;
+
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
return isInlineImmediate(N);
}]>;
@@ -763,6 +871,18 @@ def ExpTgtMatchClass : AsmOperandClass {
let RenderMethod = "printExpTgt";
}
+def SWaitMatchClass : AsmOperandClass {
+ let Name = "SWaitCnt";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseSWaitCntOps";
+}
+
+def VReg32OrOffClass : AsmOperandClass {
+ let Name = "VReg32OrOff";
+ let ParserMethod = "parseVReg32OrOff";
+}
+
+let OperandType = "OPERAND_IMMEDIATE" in {
def SendMsgImm : Operand<i32> {
let PrintMethod = "printSendMsg";
let ParserMatchClass = SendMsgMatchClass;
@@ -778,22 +898,11 @@ def EndpgmImm : Operand<i16> {
let ParserMatchClass = EndpgmMatchClass;
}
-def SWaitMatchClass : AsmOperandClass {
- let Name = "SWaitCnt";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "parseSWaitCntOps";
-}
-
-def VReg32OrOffClass : AsmOperandClass {
- let Name = "VReg32OrOff";
- let ParserMethod = "parseVReg32OrOff";
-}
-
def WAIT_FLAG : Operand <i32> {
let ParserMatchClass = SWaitMatchClass;
let PrintMethod = "printWaitFlag";
- let OperandType = "OPERAND_IMMEDIATE";
}
+} // End OperandType = "OPERAND_IMMEDIATE"
include "SIInstrFormats.td"
include "VIInstrFormats.td"
@@ -929,6 +1038,7 @@ def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
+def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
@@ -1317,18 +1427,6 @@ class getVALUDstForVT<ValueType VT> {
VOPDstS64orS32)))); // else VT == i1
}
-// Returns true if VT is floating point.
-class getIsFP<ValueType VT> {
- bit ret = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, v2f16.Value), 1,
- !if(!eq(VT.Value, v4f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, v2f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- !if(!eq(VT.Value, v2f64.Value), 1,
- 0)))))));
-}
-
// Returns the register class to use for the destination of VOP[12C]
// instructions with SDWA extension
class getSDWADstForVT<ValueType VT> {
@@ -1340,7 +1438,7 @@ class getSDWADstForVT<ValueType VT> {
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
RegisterOperand ret =
!if(isFP,
@@ -1373,11 +1471,14 @@ class getVOPSrc0ForVT<ValueType VT> {
// Returns the vreg register class to use for source operand given VT
class getVregSrcForVT<ValueType VT> {
RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
- !if(!eq(VT.Size, 64), VReg_64, VGPR_32));
+ !if(!eq(VT.Size, 96), VReg_96,
+ !if(!eq(VT.Size, 64), VReg_64,
+ !if(!eq(VT.Size, 48), VReg_64,
+ VGPR_32))));
}
class getSDWASrcForVT <ValueType VT> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
RegisterOperand ret = !if(isFP, retFlt, retInt);
@@ -1386,7 +1487,7 @@ class getSDWASrcForVT <ValueType VT> {
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
RegisterOperand ret =
!if(!eq(VT.Size, 128),
VSrc_128,
@@ -1433,7 +1534,7 @@ class isModifierType<ValueType SrcVT> {
// Return type of input modifiers operand for specified input operand
class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
@@ -1452,7 +1553,7 @@ class getOpSelMod <ValueType VT> {
// Return type of input modifiers operand specified input operand for DPP
class getSrcModExt <ValueType VT> {
- bit isFP = getIsFP<VT>.ret;
+ bit isFP = isFloatType<VT>.ret;
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
@@ -2038,6 +2139,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field int NeedPatGen = PatGenMode.NoPattern;
field bit IsMAI = 0;
+ field bit IsDOT = 0;
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);