diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/Target/AMDGPU/SIInstrInfo.td | |
parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) | |
download | src-1d5ae1026e831016fc29fd927877c86af904481f.tar.gz src-1d5ae1026e831016fc29fd927877c86af904481f.zip |
Vendor import of stripped llvm trunk r375505, the last commit before thevendor/llvm/llvm-trunk-r375505vendor/llvm
upstream Subversion repository was made read-only, and the LLVM project
migrated to GitHub:
https://llvm.org/svn/llvm-project/llvm/trunk@375505
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=353940
svn path=/vendor/llvm/llvm-r375505/; revision=353941; tag=vendor/llvm/llvm-trunk-r375505
Diffstat (limited to 'lib/Target/AMDGPU/SIInstrInfo.td')
-rw-r--r-- | lib/Target/AMDGPU/SIInstrInfo.td | 320 |
1 files changed, 211 insertions, 109 deletions
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index c382c816e0b4..1eecbf555613 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -84,7 +84,7 @@ def SDTtbuffer_load : SDTypeProfile<1, 8, SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // format(imm) - SDTCisVT<7, i32>, // cachecontrol(imm) + SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<8, i1> // idxen(imm) ]>; @@ -102,7 +102,7 @@ def SDTtbuffer_store : SDTypeProfile<0, 9, SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // format(imm) - SDTCisVT<7, i32>, // cachecontrol(imm) + SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<8, i1> // idxen(imm) ]>; @@ -119,7 +119,7 @@ def SDTBufferLoad : SDTypeProfile<1, 7, SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // cachepolicy(imm) + SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<7, i1>]>; // idxen(imm) def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, @@ -145,7 +145,7 @@ def SDTBufferStore : SDTypeProfile<0, 8, SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // cachepolicy(imm) + SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<7, i1>]>; // idxen(imm) def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, @@ -198,6 +198,8 @@ def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; +def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; +def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>; def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>; @@ -264,6 +266,11 @@ def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] >; +def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", + SDTypeProfile<0 ,1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue] +>; + //===----------------------------------------------------------------------===// // ValueType helpers //===----------------------------------------------------------------------===// @@ -277,7 +284,9 @@ class isFloatType<ValueType SrcVT> { !if(!eq(SrcVT.Value, f64.Value), 1, !if(!eq(SrcVT.Value, v2f16.Value), 1, !if(!eq(SrcVT.Value, v4f16.Value), 1, - 0))))); + !if(!eq(SrcVT.Value, v2f32.Value), 1, + !if(!eq(SrcVT.Value, v2f64.Value), 1, + 0))))))); } class isIntType<ValueType SrcVT> { @@ -300,14 +309,36 @@ class isPackedType<ValueType SrcVT> { // PatFrags for global memory operations //===----------------------------------------------------------------------===// -defm atomic_inc_global : global_binary_atomic_op<SIatomic_inc>; -defm atomic_dec_global : global_binary_atomic_op<SIatomic_dec>; +foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { +let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { + -def atomic_inc_local : local_binary_atomic_op<SIatomic_inc>; -def atomic_dec_local : local_binary_atomic_op<SIatomic_dec>; -def atomic_load_fadd_local : local_binary_atomic_op<atomic_load_fadd>; -def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>; -def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>; +defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>; +defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>; +defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>; +defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>; + + +} // End let AddressSpaces = ... +} // End foreach AddrSpace + +def atomic_fadd_global_noret : PatFrag< + (ops node:$ptr, node:$value), + (SIglobal_atomic_fadd node:$ptr, node:$value)> { + // FIXME: Move this + let MemoryVT = f32; + let IsAtomic = 1; + let AddressSpaces = StoreAddress_global.AddrSpaces; +} + +def atomic_pk_fadd_global_noret : PatFrag< + (ops node:$ptr, node:$value), + (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> { + // FIXME: Move this + let MemoryVT = v2f16; + let IsAtomic = 1; + let AddressSpaces = StoreAddress_global.AddrSpaces; +} //===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. @@ -328,10 +359,12 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, >; def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { + let IsLoad = 1; let IsUnindexed = 1; } def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { + let IsLoad = 1; let IsNonExtLoad = 1; } @@ -347,14 +380,15 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr), let MemoryVT = i64; } -def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { +def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { let IsLoad = 1; let IsAnyExtLoad = 1; } -def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{ - return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; -}]>; +def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { + let IsLoad = 1; + let IsSignExtLoad = 1; +} def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { let IsLoad = 1; @@ -391,25 +425,50 @@ def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { let MemoryVT = i16; } -def load_glue_align8 : Aligned8Bytes < - (ops node:$ptr), (load_glue node:$ptr) ->; -def load_glue_align16 : Aligned16Bytes < - (ops node:$ptr), (load_glue node:$ptr) ->; +let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { + let IsNonExtLoad = 1; +} -def load_local_m0 : LoadFrag<load_glue>, LocalAddress; -def sextloadi8_local_m0 : LoadFrag<sextloadi8_glue>, LocalAddress; -def sextloadi16_local_m0 : LoadFrag<sextloadi16_glue>, LocalAddress; -def extloadi8_local_m0 : LoadFrag<extloadi8_glue>, LocalAddress; -def zextloadi8_local_m0 : LoadFrag<zextloadi8_glue>, LocalAddress; -def extloadi16_local_m0 : LoadFrag<extloadi16_glue>, LocalAddress; -def zextloadi16_local_m0 : LoadFrag<zextloadi16_glue>, LocalAddress; -def load_align8_local_m0 : LoadFrag <load_glue_align8>, LocalAddress; -def load_align16_local_m0 : LoadFrag <load_glue_align16>, LocalAddress; -def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress; -def atomic_load_64_local_m0 : LoadFrag<atomic_load_64_glue>, LocalAddress; +let MemoryVT = i8 in { +def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; +def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; +def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; +} + +let MemoryVT = i16 in { +def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; +def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; +def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; +} + +def load_align8_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 8; +} +def load_align16_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 16; +} + +} // End IsLoad = 1 + +let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_32_glue node:$ptr)> { + let MemoryVT = i32; +} +def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_64_glue node:$ptr)> { + let MemoryVT = i64; +} + +} // End let AddressSpaces = LoadAddress_local.AddrSpaces def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, @@ -420,50 +479,88 @@ def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] >; -def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val), - (AMDGPUatomic_st_glue node:$ptr, node:$val)> { -} - def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), - (AMDGPUst_glue node:$val, node:$ptr), [{ - return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; -}]>; + (AMDGPUst_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsUnindexed = 1; +} def store_glue : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr), [{ - return !cast<StoreSDNode>(N)->isTruncatingStore(); -}]>; + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr), [{ - return cast<StoreSDNode>(N)->isTruncatingStore(); -}]>; + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 1; +} def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), - (truncstore_glue node:$val, node:$ptr), [{ - return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8; -}]>; + (truncstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), - (truncstore_glue node:$val, node:$ptr), [{ - return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16; -}]>; + (truncstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} -def store_glue_align8 : Aligned8Bytes < - (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) ->; +let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { +def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (store_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} -def store_glue_align16 : Aligned16Bytes < - (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) ->; +def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} + +def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} +} + +def store_align16_local_m0 : PatFrag < + (ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; + let MinAlignment = 16; +} -def store_local_m0 : StoreFrag<store_glue>, LocalAddress; -def truncstorei8_local_m0 : StoreFrag<truncstorei8_glue>, LocalAddress; -def truncstorei16_local_m0 : StoreFrag<truncstorei16_glue>, LocalAddress; -def atomic_store_local_m0 : StoreFrag<AMDGPUatomic_st_glue>, LocalAddress; +def store_align8_local_m0 : PatFrag < + (ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; + let MinAlignment = 8; +} + +let AddressSpaces = StoreAddress_local.AddrSpaces in { + +def atomic_store_local_32_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i32; +} +def atomic_store_local_64_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i64; +} +} // End let AddressSpaces = StoreAddress_local.AddrSpaces -def store_align8_local_m0 : StoreFrag<store_glue_align8>, LocalAddress; -def store_align16_local_m0 : StoreFrag<store_glue_align16>, LocalAddress; def si_setcc_uniform : PatFrag < (ops node:$lhs, node:$rhs, node:$cond), @@ -539,16 +636,27 @@ def lshl_rev : PatFrag < (shl $src0, $src1) >; +def add_ctpop : PatFrag < + (ops node:$src0, node:$src1), + (add (ctpop $src0), $src1) +>; + multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, - SDTypeProfile tc = SDTAtomic2> { + SDTypeProfile tc = SDTAtomic2, + bit IsInt = 1> { def _glue : SDNode < !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; - def _local_m0 : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; - def _region_m0 : region_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; + let AddressSpaces = StoreAddress_local.AddrSpaces in { + defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; + } + + let AddressSpaces = StoreAddress_region.AddrSpaces in { + defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; + } } defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; @@ -563,17 +671,9 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; -defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>; -defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>; -defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>; - -def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] ->; - -def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>; -def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>; - +defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; +defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; +defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; def as_i1imm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); @@ -591,6 +691,10 @@ def as_i32imm: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; +def as_i32timm: SDNodeXForm<timm, [{ + return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); +}]>; + def as_i64imm: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); }]>; @@ -627,9 +731,13 @@ def SIMM16bit : ImmLeaf <i32, >; def UIMM16bit : ImmLeaf <i32, - [{return isUInt<16>(Imm); }] + [{return isUInt<16>(Imm);}] >; +def i64imm_32bit : ImmLeaf<i64, [{ + return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); +}]>; + class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{ return isInlineImmediate(N); }]>; @@ -763,6 +871,18 @@ def ExpTgtMatchClass : AsmOperandClass { let RenderMethod = "printExpTgt"; } +def SWaitMatchClass : AsmOperandClass { + let Name = "SWaitCnt"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "parseSWaitCntOps"; +} + +def VReg32OrOffClass : AsmOperandClass { + let Name = "VReg32OrOff"; + let ParserMethod = "parseVReg32OrOff"; +} + +let OperandType = "OPERAND_IMMEDIATE" in { def SendMsgImm : Operand<i32> { let PrintMethod = "printSendMsg"; let ParserMatchClass = SendMsgMatchClass; @@ -778,22 +898,11 @@ def EndpgmImm : Operand<i16> { let ParserMatchClass = EndpgmMatchClass; } -def SWaitMatchClass : AsmOperandClass { - let Name = "SWaitCnt"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "parseSWaitCntOps"; -} - -def VReg32OrOffClass : AsmOperandClass { - let Name = "VReg32OrOff"; - let ParserMethod = "parseVReg32OrOff"; -} - def WAIT_FLAG : Operand <i32> { let ParserMatchClass = SWaitMatchClass; let PrintMethod = "printWaitFlag"; - let OperandType = "OPERAND_IMMEDIATE"; } +} // End OperandType = "OPERAND_IMMEDIATE" include "SIInstrFormats.td" include "VIInstrFormats.td" @@ -929,6 +1038,7 @@ def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>; def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; +def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; @@ -1317,18 +1427,6 @@ class getVALUDstForVT<ValueType VT> { VOPDstS64orS32)))); // else VT == i1 } -// Returns true if VT is floating point. -class getIsFP<ValueType VT> { - bit ret = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, v2f16.Value), 1, - !if(!eq(VT.Value, v4f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, v2f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - !if(!eq(VT.Value, v2f64.Value), 1, - 0))))))); -} - // Returns the register class to use for the destination of VOP[12C] // instructions with SDWA extension class getSDWADstForVT<ValueType VT> { @@ -1340,7 +1438,7 @@ class getSDWADstForVT<ValueType VT> { // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT<ValueType VT> { - bit isFP = getIsFP<VT>.ret; + bit isFP = isFloatType<VT>.ret; RegisterOperand ret = !if(isFP, @@ -1373,11 +1471,14 @@ class getVOPSrc0ForVT<ValueType VT> { // Returns the vreg register class to use for source operand given VT class getVregSrcForVT<ValueType VT> { RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, - !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); + !if(!eq(VT.Size, 96), VReg_96, + !if(!eq(VT.Size, 64), VReg_64, + !if(!eq(VT.Size, 48), VReg_64, + VGPR_32)))); } class getSDWASrcForVT <ValueType VT> { - bit isFP = getIsFP<VT>.ret; + bit isFP = isFloatType<VT>.ret; RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); RegisterOperand ret = !if(isFP, retFlt, retInt); @@ -1386,7 +1487,7 @@ class getSDWASrcForVT <ValueType VT> { // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT<ValueType VT> { - bit isFP = getIsFP<VT>.ret; + bit isFP = isFloatType<VT>.ret; RegisterOperand ret = !if(!eq(VT.Size, 128), VSrc_128, @@ -1433,7 +1534,7 @@ class isModifierType<ValueType SrcVT> { // Return type of input modifiers operand for specified input operand class getSrcMod <ValueType VT, bit EnableF32SrcMods> { - bit isFP = getIsFP<VT>.ret; + bit isFP = isFloatType<VT>.ret; bit isPacked = isPackedType<VT>.ret; Operand ret = !if(!eq(VT.Size, 64), !if(isFP, FP64InputMods, Int64InputMods), @@ -1452,7 +1553,7 @@ class getOpSelMod <ValueType VT> { // Return type of input modifiers operand specified input operand for DPP class getSrcModExt <ValueType VT> { - bit isFP = getIsFP<VT>.ret; + bit isFP = isFloatType<VT>.ret; Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } @@ -2038,6 +2139,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, field int NeedPatGen = PatGenMode.NoPattern; field bit IsMAI = 0; + field bit IsDOT = 0; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); |