diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/VOP1Instructions.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 93 |
1 files changed, 88 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 95a1d8696347..ef652fce6548 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -571,6 +571,7 @@ let SubtargetPredicate = isGFX9Only in { } // End SubtargetPredicate = isGFX9Only class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { + let HasExtDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 1; let HasExt = 1; @@ -599,6 +600,7 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index, (inst_sdwa 0, $src, 0, 0, index) >; +let SubtargetPredicate = isGFX9Only in { let OtherPredicates = [HasCvtFP8VOP1Bug] in { def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; @@ -617,6 +619,7 @@ foreach Index = [1, 2, 3] in { def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>; def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>; } +} // End SubtargetPredicate = isGFX9Only class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< @@ -626,11 +629,77 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, (inst_e32 $src)) >; -foreach Index = [0, -1] in { - def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, - V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; - def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, - V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; +let SubtargetPredicate = isGFX9Only in { + foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, + V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, + V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; + } +} + + +// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions. +def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> { + let HasOpSel = 1; + let HasExtVOP3DPP = 0; +} + +def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]> { + let HasOpSel = 1; + let HasExtDPP = 1; + let HasExtVOP3DPP = 1; + let IsFP8 = 1; + let HasClamp = 0; + let HasOMod = 0; + let HasModifiers = 1; + let Src1VOP3DPP = Src1RC64; +} + +let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>; + defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>; + defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; + defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; +} + +class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index, + VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< + (f32 (node i32:$src, index)), + !if (index, + (inst_e64 !if(index{0}, + !if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1), + SRCMODS.OP_SEL_0), + !if(index{1}, SRCMODS.OP_SEL_1, 0)), + $src, 0), + (inst_e32 $src)) +>; + +let SubtargetPredicate = isGFX12Plus in { + foreach Index = [0, 1, 2, 3] in { + def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index, + V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>; + def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_bf8, Index, + V_CVT_F32_BF8_e32, V_CVT_F32_BF8_OP_SEL_e64>; + } +} + +class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index, + VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< + (v2f32 (node i32:$src, index)), + !if (index, + (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE), + (inst_e32 $src)) +>; + +let SubtargetPredicate = isGFX12Plus in { + foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index, + V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>; + def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index, + V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_OP_SEL_e64>; + } } let SubtargetPredicate = isGFX10Plus in { @@ -853,6 +922,20 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op, VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>; +// Define VOP1 instructions using the pseudo instruction with its old profile and +// VOP3 using the OpSel profile for the pseudo instruction. +defm V_CVT_F32_FP8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06c, "V_CVT_F32_FP8", "v_cvt_f32_fp8">; +defm V_CVT_F32_FP8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; + +defm V_CVT_F32_BF8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06d, "V_CVT_F32_BF8", "v_cvt_f32_bf8">; +defm V_CVT_F32_BF8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">; + +defm V_CVT_PK_F32_FP8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8", "v_cvt_pk_f32_fp8">; +defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_OP_SEL", "v_cvt_pk_f32_fp8">; + +defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">; +defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">; + defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c, "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d, |