diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUGISel.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 114 |
1 files changed, 103 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index d420aa02ac28..3f12addbcc79 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// include "AMDGPU.td" +include "AMDGPUCombine.td" def sd_vsrc0 : ComplexPattern<i32, 1, "">; def gi_vsrc0 : @@ -30,6 +31,10 @@ def gi_vop3mods : GIComplexOperandMatcher<s32, "selectVOP3Mods">, GIComplexPatternEquiv<VOP3Mods>; +def gi_vop3_no_mods : + GIComplexOperandMatcher<s32, "selectVOP3NoMods">, + GIComplexPatternEquiv<VOP3NoMods>; + def gi_vop3mods_nnan : GIComplexOperandMatcher<s32, "selectVOP3Mods_nnan">, GIComplexPatternEquiv<VOP3Mods_nnan>; @@ -38,9 +43,9 @@ def gi_vop3omods : GIComplexOperandMatcher<s32, "selectVOP3OMods">, GIComplexPatternEquiv<VOP3OMods>; -def gi_vop3opselmods0 : - GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">, - GIComplexPatternEquiv<VOP3OpSelMods0>; +def gi_vop3pmods : + GIComplexOperandMatcher<s32, "selectVOP3PMods">, + GIComplexPatternEquiv<VOP3PMods>; def gi_vop3opselmods : GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">, @@ -83,6 +88,33 @@ def gi_ds_1addr_1offset : GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">, GIComplexPatternEquiv<DS1Addr1Offset>; +def gi_ds_64bit_4byte_aligned : + GIComplexOperandMatcher<s64, "selectDS64Bit4ByteAligned">, + GIComplexPatternEquiv<DS64Bit4ByteAligned>; + +def gi_mubuf_addr64 : + GIComplexOperandMatcher<s64, "selectMUBUFAddr64">, + GIComplexPatternEquiv<MUBUFAddr64>; + +def gi_mubuf_offset : + GIComplexOperandMatcher<s64, "selectMUBUFOffset">, + GIComplexPatternEquiv<MUBUFOffset>; + +def gi_mubuf_addr64_atomic : + GIComplexOperandMatcher<s64, "selectMUBUFAddr64Atomic">, + GIComplexPatternEquiv<MUBUFAddr64Atomic>; + +def gi_mubuf_offset_atomic : + GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">, + GIComplexPatternEquiv<MUBUFOffsetAtomic>; + +def gi_smrd_buffer_imm : + GIComplexOperandMatcher<s64, "selectSMRDBufferImm">, + GIComplexPatternEquiv<SMRDBufferImm>; + +def gi_smrd_buffer_imm32 : + GIComplexOperandMatcher<s64, "selectSMRDBufferImm32">, + GIComplexPatternEquiv<SMRDBufferImm32>; // Separate load nodes are defined to glue m0 initialization in // SelectionDAG. The GISel selector can just insert m0 initialization @@ -116,9 +148,54 @@ def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin_glue>; def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>; def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>; -def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>; -def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>; +def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32_impl>; +def : GINodeEquiv<G_AMDGPU_FMIN_LEGACY, AMDGPUfmin_legacy>; +def : GINodeEquiv<G_AMDGPU_FMAX_LEGACY, AMDGPUfmax_legacy>; +def : GINodeEquiv<G_AMDGPU_RCP_IFLAG, AMDGPUrcp_iflag>; +def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE0, AMDGPUcvt_f32_ubyte0>; +def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE1, AMDGPUcvt_f32_ubyte1>; +def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>; +def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>; + +def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>; +def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>; +def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>; +def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_UBYTE, SIbuffer_load_ubyte>; +def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SSHORT, SIbuffer_load_short>; +def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SBYTE, SIbuffer_load_byte>; +def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT, SIbuffer_load_format>; +def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT_D16, SIbuffer_load_format_d16>; +def : GINodeEquiv<G_AMDGPU_TBUFFER_LOAD_FORMAT, SItbuffer_load>; +def : GINodeEquiv<G_AMDGPU_TBUFFER_LOAD_FORMAT_D16, SItbuffer_load_d16>; +def : GINodeEquiv<G_AMDGPU_BUFFER_STORE, SIbuffer_store>; +def : GINodeEquiv<G_AMDGPU_BUFFER_STORE_SHORT, SIbuffer_store_short>; +def : GINodeEquiv<G_AMDGPU_BUFFER_STORE_BYTE, SIbuffer_store_byte>; +def : GINodeEquiv<G_AMDGPU_BUFFER_STORE_FORMAT, SIbuffer_store_format>; +def : GINodeEquiv<G_AMDGPU_BUFFER_STORE_FORMAT_D16, SIbuffer_store_format_d16>; +def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT, SItbuffer_store>; +def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT_D16, SItbuffer_store_d16>; + +// FIXME: Check MMO is atomic +def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>; +def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, SIatomic_dec>; +def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, atomic_inc_glue>; +def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, atomic_dec_glue>; + +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SWAP, SIbuffer_atomic_swap>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_ADD, SIbuffer_atomic_add>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SUB, SIbuffer_atomic_sub>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SMIN, SIbuffer_atomic_smin>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_UMIN, SIbuffer_atomic_umin>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SMAX, SIbuffer_atomic_smax>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_UMAX, SIbuffer_atomic_umax>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_AND, SIbuffer_atomic_and>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_OR, SIbuffer_atomic_or>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>; +def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>; +def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>; class GISelSop2Pat < SDPatternOperator node, @@ -188,16 +265,13 @@ multiclass GISelVop2IntrPat < def : GISelVop2Pat <node, inst, dst_vt, src_vt>; - // FIXME: Intrinsics aren't marked as commutable, so we need to add an explcit + // FIXME: Intrinsics aren't marked as commutable, so we need to add an explicit // pattern to handle commuting. This is another reason why legalizing to a // generic machine instruction may be better that matching the intrinsic // directly. def : GISelVop2CommutePat <node, inst, dst_vt, src_vt>; } -def : GISelSop2Pat <or, S_OR_B32, i32>; -def : GISelVop2Pat <or, V_OR_B32_e32, i32>; - // Since GlobalISel is more flexible then SelectionDAG, I think we can get // away with adding patterns for integer types and not legalizing all // loads and stores to vector types. This should help simplify the load/store @@ -206,12 +280,18 @@ foreach Ty = [i64, p0, p1, p4] in { defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>; } -def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">, +def gi_as_i32timm : GICustomOperandRenderer<"renderTruncTImm32">, GISDNodeXFormEquiv<as_i32timm>; -def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">, +def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm16">, GISDNodeXFormEquiv<as_i16timm>; +def gi_as_i8timm : GICustomOperandRenderer<"renderTruncTImm8">, + GISDNodeXFormEquiv<as_i8timm>; + +def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm1">, + GISDNodeXFormEquiv<as_i1timm>; + def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, GISDNodeXFormEquiv<NegateImm>; @@ -220,3 +300,15 @@ def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">, def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">, GISDNodeXFormEquiv<IMMPopCount>; + +def gi_extract_glc : GICustomOperandRenderer<"renderExtractGLC">, + GISDNodeXFormEquiv<extract_glc>; + +def gi_extract_slc : GICustomOperandRenderer<"renderExtractSLC">, + GISDNodeXFormEquiv<extract_slc>; + +def gi_extract_dlc : GICustomOperandRenderer<"renderExtractDLC">, + GISDNodeXFormEquiv<extract_dlc>; + +def gi_extract_swz : GICustomOperandRenderer<"renderExtractSWZ">, + GISDNodeXFormEquiv<extract_swz>; |