diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/VOP1Instructions.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 139 |
1 files changed, 74 insertions, 65 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index c7aed0985540..17f334f62a30 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1,4 +1,4 @@ -//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===// +//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -48,9 +48,13 @@ class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1On let mayStore = 0; let hasSideEffects = 0; + let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); + + let mayRaiseFPException = ReadsModeReg; + let VOP1 = 1; let VALU = 1; - let Uses = [EXEC]; + let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let AsmVariantName = AMDGPUAsmVariants.Default; } @@ -89,9 +93,7 @@ class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { list<dag> ret = !if(P.HasModifiers, - [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, - i32:$src0_modifiers, - i1:$clamp, i32:$omod))))], + [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], !if(P.HasOMod, [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, i1:$clamp, i32:$omod))))], @@ -102,8 +104,13 @@ class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { multiclass VOP1Inst <string opName, VOPProfile P, SDPatternOperator node = null_frag> { - def _e32 : VOP1_Pseudo <opName, P>; - def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; + // We only want to set this on the basic, non-SDWA or DPP forms. + defvar should_mov_imm = !eq(opName, "v_mov_b32"); + + let isMoveImm = should_mov_imm in { + def _e32 : VOP1_Pseudo <opName, P>; + def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; + } foreach _ = BoolToList<P.HasExtSDWA>.ret in def _sdwa : VOP1_SDWA_Pseudo <opName, P>; @@ -146,7 +153,7 @@ let VOPAsmPrefer32Bit = 1 in { defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; } -let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; } // End isMoveImm = 1 @@ -183,31 +190,51 @@ def V_READFIRSTLANE_B32 : let SchedRW = [WriteDoubleCvt] in { defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; + +let mayRaiseFPException = 0 in { defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; +} + defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; + +let mayRaiseFPException = 0 in { defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; +} + } // End SchedRW = [WriteDoubleCvt] -let SchedRW = [WriteQuarterRate32] in { +let SchedRW = [WriteFloatCvt] in { + +// XXX: Does this really not raise exceptions? The manual claims the +// 16-bit ones can. +let mayRaiseFPException = 0 in { defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; +} + defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; let FPDPRounding = 1 in { defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; } // End FPDPRounding = 1 + defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; + +let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; -} // End SchedRW = [WriteQuarterRate32] +} // End ReadsModeReg = 0, mayRaiseFPException = 0 +} // End SchedRW = [WriteFloatCvt] +let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; +} // ReadsModeReg = 0, mayRaiseFPException = 0 defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; @@ -215,33 +242,30 @@ defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; -let SchedRW = [WriteQuarterRate32] in { +let SchedRW = [WriteTrans32] in { defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; -defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; -} // End SchedRW = [WriteQuarterRate32] +defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>; +} // End SchedRW = [WriteTrans32] -let SchedRW = [WriteDouble] in { +let SchedRW = [WriteTrans64] in { defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; -} // End SchedRW = [WriteDouble]; - -let SchedRW = [WriteDouble] in { -defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>; -} // End SchedRW = [WriteDouble] +defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>; +} // End SchedRW = [WriteTrans64] -let SchedRW = [WriteQuarterRate32] in { +let SchedRW = [WriteTrans32] in { defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; -} // End SchedRW = [WriteQuarterRate32] +} // End SchedRW = [WriteTrans32] defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; -defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; +defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; let SchedRW = [WriteDoubleAdd] in { @@ -317,7 +341,7 @@ defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; let SubtargetPredicate = isGFX6GFX7 in { - let SchedRW = [WriteQuarterRate32] in { + let SchedRW = [WriteTrans32] in { defm V_LOG_CLAMP_F32 : VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; defm V_RCP_CLAMP_F32 : @@ -327,8 +351,8 @@ let SubtargetPredicate = isGFX6GFX7 in { defm V_RSQ_CLAMP_F32 : VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; defm V_RSQ_LEGACY_F32 : - VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; - } // End SchedRW = [WriteQuarterRate32] + VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; + } // End SchedRW = [WriteTrans32] let SchedRW = [WriteDouble] in { defm V_RCP_CLAMP_F64 : @@ -339,10 +363,10 @@ let SubtargetPredicate = isGFX6GFX7 in { } // End SubtargetPredicate = isGFX6GFX7 let SubtargetPredicate = isGFX7GFX8GFX9 in { - let SchedRW = [WriteQuarterRate32] in { + let SchedRW = [WriteTrans32] in { defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; - } // End SchedRW = [WriteQuarterRate32] + } // End SchedRW = [WriteTrans32] } // End SubtargetPredicate = isGFX7GFX8GFX9 let SubtargetPredicate = isGFX7Plus in { @@ -362,15 +386,15 @@ defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; } // End FPDPRounding = 1 defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; -let SchedRW = [WriteQuarterRate32] in { +let SchedRW = [WriteTrans32] in { defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; -defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>; +defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; -} // End SchedRW = [WriteQuarterRate32] +} // End SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; @@ -414,8 +438,11 @@ let SubtargetPredicate = isGFX9Plus in { } defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; - defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; - defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; + + let mayRaiseFPException = 0 in { + defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; + defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; + } // End mayRaiseFPException = 0 } // End SubtargetPredicate = isGFX9Plus let SubtargetPredicate = isGFX9Only in { @@ -458,7 +485,7 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1 class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : VOP1_DPP<op, ps, p, 1>, SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> { - let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); + let AssemblerPredicate = HasDPP16; let SubtargetPredicate = HasDPP16; } @@ -475,7 +502,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); let Inst{31-25} = 0x3f; - let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst); + let AssemblerPredicate = HasDPP8; let SubtargetPredicate = HasDPP8; } @@ -812,42 +839,23 @@ def V_MOV_B32_indirect : VPseudoInstSI<(outs), let SubtargetPredicate = isGFX8GFX9; } -// This is a pseudo variant of the v_movreld_b32 instruction in which the -// vector operand appears only twice, once as def and once as use. Using this -// pseudo avoids problems with the Two Address instructions pass. -class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI < - (outs rc:$vdst), - (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> { - let VOP1 = 1; - - let Constraints = "$vsrc = $vdst"; - let Uses = [M0, EXEC]; - - let SubtargetPredicate = HasMovrel; -} - -def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>; -def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>; -def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>; -def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>; -def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>; - let OtherPredicates = [isGFX8Plus] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, - timm:$bound_ctrl)), - (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), - (as_i32imm $row_mask), (as_i32imm $bank_mask), - (as_i1imm $bound_ctrl)) + (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), + (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), + (as_i32timm $row_mask), (as_i32timm $bank_mask), + (as_i1timm $bound_ctrl)) >; def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, - timm:$bank_mask, timm:$bound_ctrl)), - (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), - (as_i32imm $row_mask), (as_i32imm $bank_mask), - (as_i1imm $bound_ctrl)) + (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, + timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), + (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), + (as_i32timm $row_mask), (as_i32timm $bank_mask), + (as_i1timm $bound_ctrl)) >; } // End OtherPredicates = [isGFX8Plus] @@ -907,6 +915,7 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let OtherPredicates = [isGFX10Plus] in { def : GCNPat < (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), - (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0)) + (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, + (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) >; } // End OtherPredicates = [isGFX10Plus] |