diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCInstrVSX.td')
-rw-r--r-- | lib/Target/PowerPC/PPCInstrVSX.td | 177 |
1 files changed, 170 insertions, 7 deletions
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index b21b251443eb..9685bac2aebb 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -11,6 +11,21 @@ // //===----------------------------------------------------------------------===// +// *********************************** NOTE *********************************** +// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing ** +// ** which VMX and VSX instructions are lane-sensitive and which are not. ** +// ** A lane-sensitive instruction relies, implicitly or explicitly, on ** +// ** whether lanes are numbered from left to right. An instruction like ** +// ** VADDFP is not lane-sensitive, because each lane of the result vector ** +// ** relies only on the corresponding lane of the source vectors. However, ** +// ** an instruction like VMULESB is lane-sensitive, because "even" and ** +// ** "odd" lanes are different for big-endian and little-endian numbering. ** +// ** ** +// ** When adding new VMX and VSX instructions, please consider whether they ** +// ** are lane-sensitive. If so, they must be added to a switch statement ** +// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). ** +// **************************************************************************** + def PPCRegVSRCAsmOperand : AsmOperandClass { let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; } @@ -25,6 +40,13 @@ def vsfrc : RegisterOperand<VSFRC> { let ParserMatchClass = PPCRegVSFRCAsmOperand; } +def PPCRegVSSRCAsmOperand : AsmOperandClass { + let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber"; +} +def vssrc : RegisterOperand<VSSRC> { + let ParserMatchClass = PPCRegVSSRCAsmOperand; +} + // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -41,6 +63,9 @@ def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; +def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; +def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; +def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, @@ -66,7 +91,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects. let Uses = [RM] in { // Load indexed instructions - let mayLoad = 1, canFoldAsLoad = 1 in { + let mayLoad = 1 in { def LXSDX : XX1Form<31, 588, (outs vsfrc:$XT), (ins memrr:$src), "lxsdx $XT, $src", IIC_LdStLFD, @@ -85,7 +110,7 @@ let Uses = [RM] in { (outs vsrc:$XT), (ins memrr:$src), "lxvw4x $XT, $src", IIC_LdStLFD, [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>; - } + } // mayLoad // Store indexed instructions let mayStore = 1 in { @@ -97,13 +122,14 @@ let Uses = [RM] in { def STXVD2X : XX1Form<31, 972, (outs), (ins vsrc:$XT, memrr:$dst), "stxvd2x $XT, $dst", IIC_LdStSTFD, - [(int_ppc_vsx_stxvd2x v2f64:$XT, xoaddr:$dst)]>; + [(store v2f64:$XT, xoaddr:$dst)]>; def STXVW4X : XX1Form<31, 908, (outs), (ins vsrc:$XT, memrr:$dst), "stxvw4x $XT, $dst", IIC_LdStSTFD, - [(int_ppc_vsx_stxvw4x v4i32:$XT, xoaddr:$dst)]>; - } + [(store v4i32:$XT, xoaddr:$dst)]>; + + } // mayStore // Add/Mul Instructions let isCommutable = 1 in { @@ -773,6 +799,15 @@ let usesCustomInserter = 1, // Expanded after instruction selection. "#SELECT_VSFRC", [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; + def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst), + (ins crrc:$cond, f4rc:$T, f4rc:$F, + i32imm:$BROPC), "#SELECT_CC_VSSRC", + []>; + def SELECT_VSSRC: Pseudo<(outs f4rc:$dst), + (ins crbitrc:$cond, f4rc:$T, f4rc:$F), + "#SELECT_VSSRC", + [(set f32:$dst, + (select i1:$cond, f32:$T, f32:$F))]>; } // usesCustomInserter } // AddedComplexity @@ -872,6 +907,11 @@ def : Pat<(v2f64 (bitconvert v2i64:$A)), def : Pat<(v2i64 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v2f64 (bitconvert v1i128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v1i128 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + // sign extension patterns // To extend "in place" from v2i32 to v2i64, we have input data like: // | undef | i32 | undef | i32 | @@ -891,9 +931,11 @@ def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; // Stores. -def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; // Permutes. @@ -938,3 +980,124 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), } // AddedComplexity } // HasVSX +// The following VSX instructions were introduced in Power ISA 2.07 +/* FIXME: if the operands are v2i64, these patterns will not match. + we should define new patterns or otherwise match the same patterns + when the elements are larger than i32. +*/ +def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; +def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; +let Predicates = [HasP8Vector] in { +let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. + let isCommutable = 1 in { + def XXLEQV : XX3Form<60, 186, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxleqv $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>; + def XXLNAND : XX3Form<60, 178, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlnand $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, + v4i32:$XB)))]>; + } // isCommutable + + def XXLORC : XX3Form<60, 170, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlorc $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; + + // VSX scalar loads introduced in ISA 2.07 + let mayLoad = 1 in { + def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), + "lxsspx $XT, $src", IIC_LdStLFD, + [(set f32:$XT, (load xoaddr:$src))]>; + def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwax $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwzx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + } // mayLoad + + // VSX scalar stores introduced in ISA 2.07 + let mayStore = 1 in { + def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), + "stxsspx $XT, $dst", IIC_LdStSTFD, + [(store f32:$XT, xoaddr:$dst)]>; + def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsiwx $XT, $dst", IIC_LdStSTFD, + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + } // mayStore + + def : Pat<(f64 (extloadf32 xoaddr:$src)), + (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; + def : Pat<(f64 (fextend f32:$src)), + (COPY_TO_REGCLASS $src, VSFRC)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + + // VSX Elementary Scalar FP arithmetic (SP) + let isCommutable = 1 in { + def XSADDSP : XX3Form<60, 0, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsaddsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>; + def XSMULSP : XX3Form<60, 16, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsmulsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>; + } // isCommutable + + def XSDIVSP : XX3Form<60, 24, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>; + def XSRESP : XX2Form<60, 26, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsresp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfre f32:$XB))]>; + def XSSQRTSP : XX2Form<60, 11, + (outs vssrc:$XT), (ins vssrc:$XB), + "xssqrtsp $XT, $XB", IIC_FPSqrtS, + [(set f32:$XT, (fsqrt f32:$XB))]>; + def XSRSQRTESP : XX2Form<60, 10, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsrsqrtesp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfrsqrte f32:$XB))]>; + def XSSUBSP : XX3Form<60, 8, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xssubsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; +} // AddedComplexity = 400 +} // HasP8Vector + +let Predicates = [HasDirectMove, HasVSX] in { + // VSX direct move instructions + def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + [(set i64:$rA, (PPCmfvsr f64:$XT))]>, + Requires<[In64BitMode]>; + def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + [(set i32:$rA, (PPCmfvsr f64:$XT))]>; + def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i64:$rA))]>, + Requires<[In64BitMode]>; + def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i32:$rA))]>; + def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; +} // HasDirectMove, HasVSX |