diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td | 356 |
1 files changed, 322 insertions, 34 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 9dcc968a1a7a..6ea81a25e41c 100644 --- a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -135,6 +135,30 @@ def : WriteRes<WriteLEA, [JALU01]>; defm : JWriteResIntPair<WriteShift, JALU01, 1>; +def WriteSHLDrri : SchedWriteRes<[JALU01]> { + let Latency = 3; + let ResourceCycles = [6]; + let NumMicroOps = 6; +} +def: InstRW<[WriteSHLDrri], (instregex "SHLD(16|32|64)rri8")>; +def: InstRW<[WriteSHLDrri], (instregex "SHRD(16|32|64)rri8")>; + +def WriteSHLDrrCL : SchedWriteRes<[JALU01]> { + let Latency = 4; + let ResourceCycles = [8]; + let NumMicroOps = 7; +} +def: InstRW<[WriteSHLDrrCL], (instregex "SHLD(16|32|64)rrCL")>; +def: InstRW<[WriteSHLDrrCL], (instregex "SHRD(16|32|64)rrCL")>; + +def WriteSHLDm : SchedWriteRes<[JLAGU, JALU01]> { + let Latency = 9; + let ResourceCycles = [1, 22]; + let NumMicroOps = 8; +} +def: InstRW<[WriteSHLDm], (instregex "SHLD(16|32|64)mr(i8|CL)")>; +def: InstRW<[WriteSHLDm], (instregex "SHRD(16|32|64)mr(i8|CL)")>; + //////////////////////////////////////////////////////////////////////////////// // Loads, stores, and moves, not folded with other operations. // FIXME: Split x86 and SSE load/store/moves @@ -142,7 +166,10 @@ defm : JWriteResIntPair<WriteShift, JALU01, 1>; def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; } def : WriteRes<WriteStore, [JSAGU]>; -def : WriteRes<WriteMove, [JAny]>; +def : WriteRes<WriteMove, [JALU01]>; + +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; //////////////////////////////////////////////////////////////////////////////// // Idioms that clear a register, like xorps %xmm0, %xmm0. @@ -168,6 +195,7 @@ defm : JWriteResIntPair<WriteJump, JALU01, 1>; defm : JWriteResFpuPair<WriteFAdd, JFPU0, 3>; defm : JWriteResFpuPair<WriteFMul, JFPU1, 2>; +defm : JWriteResFpuPair<WriteFMA, JFPU1, 2>; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair<WriteFRcp, JFPU1, 2>; defm : JWriteResFpuPair<WriteFRsqrt, JFPU1, 2>; defm : JWriteResFpuPair<WriteFShuffle, JFPU01, 1>; @@ -199,11 +227,13 @@ defm : JWriteResFpuPair<WriteCvtF2F, JFPU1, 3>; // Float -> Float size conve def : WriteRes<WriteFVarBlend, [JFPU01]> { let Latency = 2; - let ResourceCycles = [2]; + let ResourceCycles = [4]; + let NumMicroOps = 3; } def : WriteRes<WriteFVarBlendLd, [JLAGU, JFPU01]> { let Latency = 7; - let ResourceCycles = [1, 2]; + let ResourceCycles = [1, 4]; + let NumMicroOps = 3; } // Vector integer operations. @@ -217,21 +247,20 @@ defm : JWriteResFpuPair<WriteShuffle256, JFPU01, 1>; def : WriteRes<WriteVarBlend, [JFPU01]> { let Latency = 2; - let ResourceCycles = [2]; + let ResourceCycles = [4]; + let NumMicroOps = 3; } def : WriteRes<WriteVarBlendLd, [JLAGU, JFPU01]> { let Latency = 7; - let ResourceCycles = [1, 2]; + let ResourceCycles = [1, 4]; + let NumMicroOps = 3; } // FIXME: why do we need to define AVX2 resource on CPU that doesn't have AVX2? -def : WriteRes<WriteVarVecShift, [JFPU01]> { - let Latency = 1; - let ResourceCycles = [1]; -} +def : WriteRes<WriteVarVecShift, [JFPU01]> {} def : WriteRes<WriteVarVecShiftLd, [JLAGU, JFPU01]> { let Latency = 6; - let ResourceCycles = [1, 1]; + let ResourceCycles = [1, 2]; } def : WriteRes<WriteMPSAD, [JFPU0]> { @@ -249,43 +278,49 @@ def : WriteRes<WriteMPSADLd, [JLAGU, JFPU0]> { // FIXME: approximate latencies + pipe dependencies //////////////////////////////////////////////////////////////////////////////// -def : WriteRes<WritePCmpIStrM, [JFPU01]> { - let Latency = 7; - let ResourceCycles = [2]; +def : WriteRes<WritePCmpIStrM, [JFPU1,JFPU0]> { + let Latency = 8; + let ResourceCycles = [2, 2]; + let NumMicroOps = 3; } -def : WriteRes<WritePCmpIStrMLd, [JLAGU, JFPU01]> { - let Latency = 12; - let ResourceCycles = [1, 2]; +def : WriteRes<WritePCmpIStrMLd, [JLAGU, JFPU1, JFPU0]> { + let Latency = 13; + let ResourceCycles = [1, 2, 2]; + let NumMicroOps = 3; } // Packed Compare Explicit Length Strings, Return Mask -def : WriteRes<WritePCmpEStrM, [JFPU01]> { - let Latency = 13; - let ResourceCycles = [5]; +def : WriteRes<WritePCmpEStrM, [JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> { + let Latency = 14; + let ResourceCycles = [5, 5, 5, 5, 5]; + let NumMicroOps = 9; } -def : WriteRes<WritePCmpEStrMLd, [JLAGU, JFPU01]> { - let Latency = 18; - let ResourceCycles = [1, 5]; +def : WriteRes<WritePCmpEStrMLd, [JLAGU, JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> { + let Latency = 19; + let ResourceCycles = [1, 5, 5, 5, 5, 5]; + let NumMicroOps = 9; } // Packed Compare Implicit Length Strings, Return Index -def : WriteRes<WritePCmpIStrI, [JFPU01]> { - let Latency = 6; - let ResourceCycles = [2]; +def : WriteRes<WritePCmpIStrI, [JFPU1, JFPU0]> { + let Latency = 7; + let ResourceCycles = [2, 2]; } -def : WriteRes<WritePCmpIStrILd, [JLAGU, JFPU01]> { - let Latency = 11; - let ResourceCycles = [1, 2]; +def : WriteRes<WritePCmpIStrILd, [JLAGU, JFPU1, JFPU0]> { + let Latency = 12; + let ResourceCycles = [1, 2, 2]; } // Packed Compare Explicit Length Strings, Return Index -def : WriteRes<WritePCmpEStrI, [JFPU01]> { - let Latency = 13; - let ResourceCycles = [5]; +def : WriteRes<WritePCmpEStrI, [JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> { + let Latency = 14; + let ResourceCycles = [5, 5, 5, 5, 5]; + let NumMicroOps = 9; } -def : WriteRes<WritePCmpEStrILd, [JLAGU, JFPU01]> { - let Latency = 18; - let ResourceCycles = [1, 5]; +def : WriteRes<WritePCmpEStrILd, [JLAGU, JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> { + let Latency = 19; + let ResourceCycles = [1, 5, 5, 5, 5, 5]; + let NumMicroOps = 9; } //////////////////////////////////////////////////////////////////////////////// @@ -371,6 +406,38 @@ def : WriteRes<WriteFence, [JSAGU]>; def : WriteRes<WriteNop, []>; //////////////////////////////////////////////////////////////////////////////// +// SSE4.1 instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteDPPS: SchedWriteRes<[JFPU0, JFPU1]> { + let Latency = 11; + let ResourceCycles = [3,3]; + let NumMicroOps = 5; +} +def : InstRW<[WriteDPPS], (instregex "(V)?DPPSrri")>; + +def WriteDPPSLd: SchedWriteRes<[JLAGU, JFPU0, JFPU1]> { + let Latency = 16; + let ResourceCycles = [1,3,3]; + let NumMicroOps = 6; +} +def : InstRW<[WriteDPPSLd], (instregex "(V)?DPPSrmi")>; + +def WriteDPPD: SchedWriteRes<[JFPU0, JFPU1]> { + let Latency = 9; + let ResourceCycles = [3,3]; + let NumMicroOps = 3; +} +def : InstRW<[WriteDPPD], (instregex "(V)?DPPDrri")>; + +def WriteDPPDLd: SchedWriteRes<[JLAGU, JFPU0, JFPU1]> { + let Latency = 14; + let ResourceCycles = [1,3,3]; + let NumMicroOps = 3; +} +def : InstRW<[WriteDPPDLd], (instregex "(V)?DPPDrmi")>; + +//////////////////////////////////////////////////////////////////////////////// // SSE4A instructions. //////////////////////////////////////////////////////////////////////////////// @@ -387,9 +454,73 @@ def WriteINSERTQ: SchedWriteRes<[JFPU01]> { def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>; //////////////////////////////////////////////////////////////////////////////// +// F16C instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteCVT3: SchedWriteRes<[JFPU1]> { + let Latency = 3; +} +def : InstRW<[WriteCVT3], (instregex "VCVTPS2PHrr")>; +def : InstRW<[WriteCVT3], (instregex "VCVTPH2PSrr")>; + +def WriteCVT3St: SchedWriteRes<[JFPU1, JSAGU]> { + let Latency = 3; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVT3St], (instregex "VCVTPS2PHmr")>; + +def WriteCVT3Ld: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 8; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVT3Ld], (instregex "VCVTPH2PSrm")>; + +def WriteCVTPS2PHY: SchedWriteRes<[JFPU1, JFPU01]> { + let Latency = 6; + let ResourceCycles = [2,2]; + let NumMicroOps = 3; +} +def : InstRW<[WriteCVTPS2PHY], (instregex "VCVTPS2PHYrr")>; + +def WriteCVTPS2PHYSt: SchedWriteRes<[JFPU1, JFPU01, JSAGU]> { + let Latency = 11; + let ResourceCycles = [2,2,1]; + let NumMicroOps = 3; +} +def : InstRW<[WriteCVTPS2PHYSt], (instregex "VCVTPS2PHYmr")>; + +def WriteCVTPH2PSY: SchedWriteRes<[JFPU1]> { + let Latency = 3; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} +def : InstRW<[WriteCVTPH2PSY], (instregex "VCVTPH2PSYrr")>; + +def WriteCVTPH2PSYLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 8; + let ResourceCycles = [1,2]; + let NumMicroOps = 2; +} +def : InstRW<[WriteCVTPH2PSYLd], (instregex "VCVTPH2PSYrm")>; + +//////////////////////////////////////////////////////////////////////////////// // AVX instructions. //////////////////////////////////////////////////////////////////////////////// +def WriteVDPPSY: SchedWriteRes<[JFPU1, JFPU0]> { + let Latency = 12; + let ResourceCycles = [6, 6]; + let NumMicroOps = 10; +} +def : InstRW<[WriteVDPPSY], (instregex "VDPPSYrr")>; + +def WriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPU0]> { + let Latency = 17; + let ResourceCycles = [1, 6, 6]; + let NumMicroOps = 11; +} +def : InstRW<[WriteVDPPSYLd, ReadAfterLd], (instregex "VDPPSYrm")>; + def WriteFAddY: SchedWriteRes<[JFPU0]> { let Latency = 3; let ResourceCycles = [2]; @@ -438,6 +569,152 @@ def WriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { } def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; +def WriteVCVTY: SchedWriteRes<[JSTC]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVCVTY], (instregex "VCVTDQ2P(S|D)Yrr")>; +def : InstRW<[WriteVCVTY], (instregex "VROUNDYP(S|D)r")>; +def : InstRW<[WriteVCVTY], (instregex "VCVTPS2DQYrr")>; +def : InstRW<[WriteVCVTY], (instregex "VCVTTPS2DQYrr")>; + +def WriteVCVTYLd: SchedWriteRes<[JLAGU, JSTC]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTDQ2P(S|D)Yrm")>; +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VROUNDYP(S|D)m")>; +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTPS2DQYrm")>; +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTTPS2DQYrm")>; + +def WriteVMONTPSt: SchedWriteRes<[JSTC, JLAGU]> { + let Latency = 3; + let ResourceCycles = [2,1]; +} +def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTP(S|D)Ymr")>; +def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTDQYmr")>; + +def WriteVCVTPDY: SchedWriteRes<[JSTC, JFPU01]> { + let Latency = 6; + let ResourceCycles = [2, 4]; +} +def : InstRW<[WriteVCVTPDY], (instregex "VCVTPD2(DQ|PS)Yrr")>; +def : InstRW<[WriteVCVTPDY], (instregex "VCVTTPD2DQYrr")>; + +def WriteVCVTPDYLd: SchedWriteRes<[JLAGU, JSTC, JFPU01]> { + let Latency = 11; + let ResourceCycles = [1, 2, 4]; +} +def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTPD2(DQ|PS)Yrm")>; +def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTTPD2DQYrm")>; + +def WriteVBlendVPY: SchedWriteRes<[JFPU01]> { + let Latency = 3; + let ResourceCycles = [6]; +} +def : InstRW<[WriteVBlendVPY], (instregex "VBLENDVP(S|D)Yrr", "VPERMILP(D|S)Yrr")>; + +def WriteVBlendVPYLd: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 8; + let ResourceCycles = [1, 6]; +} +def : InstRW<[WriteVBlendVPYLd, ReadAfterLd], (instregex "VBLENDVP(S|D)Yrm")>; + +def WriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVBROADCASTYLd, ReadAfterLd], (instregex "VBROADCASTS(S|D)Yrm")>; + +def WriteFPAY22: SchedWriteRes<[JFPU0]> { + let Latency = 2; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>; + +def WriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 7; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>; + +def WriteVHAddSubY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVHAddSubY], (instregex "VH(ADD|SUB)P(D|S)Yrr")>; + +def WriteVHAddSubYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVHAddSubYLd], (instregex "VH(ADD|SUB)P(D|S)Yrm")>; + +def WriteVMaskMovLd: SchedWriteRes<[JLAGU,JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVMaskMovLd], (instregex "VMASKMOVP(D|S)rm")>; + +def WriteVMaskMovYLd: SchedWriteRes<[JLAGU,JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVMaskMovYLd], (instregex "VMASKMOVP(D|S)Yrm")>; + +def WriteVMaskMovSt: SchedWriteRes<[JFPU01,JSAGU]> { + let Latency = 6; + let ResourceCycles = [4, 1]; +} +def : InstRW<[WriteVMaskMovSt], (instregex "VMASKMOVP(D|S)mr")>; + +def WriteVMaskMovYSt: SchedWriteRes<[JFPU01,JSAGU]> { + let Latency = 6; + let ResourceCycles = [4, 1]; +} +def : InstRW<[WriteVMaskMovYSt], (instregex "VMASKMOVP(D|S)Ymr")>; + +// TODO: In fact we have latency '2+i'. The +i represents an additional 1 cycle transfer +// operation which moves the floating point result to the integer unit. During this +// additional cycle the floating point unit execution resources are not occupied +// and ALU0 in the integer unit is occupied instead. +def WriteVMOVMSK: SchedWriteRes<[JFPU0]> { + let Latency = 3; +} +def : InstRW<[WriteVMOVMSK], (instregex "VMOVMSKP(D|S)(Y)?rr")>; + +// TODO: In fact we have latency '3+i'. The +i represents an additional 1 cycle transfer +// operation which moves the floating point result to the integer unit. During this +// additional cycle the floating point unit execution resources are not occupied +// and ALU0 in the integer unit is occupied instead. +def WriteVTESTY: SchedWriteRes<[JFPU01, JFPU0]> { + let Latency = 4; + let ResourceCycles = [2, 2]; + let NumMicroOps = 3; +} +def : InstRW<[WriteVTESTY], (instregex "VTESTP(S|D)Yrr")>; +def : InstRW<[WriteVTESTY], (instregex "VPTESTYrr")>; + +def WriteVTESTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPU0]> { + let Latency = 9; + let ResourceCycles = [1, 2, 2]; + let NumMicroOps = 3; +} +def : InstRW<[WriteVTESTYLd], (instregex "VTESTP(S|D)Yrm")>; +def : InstRW<[WriteVTESTYLd], (instregex "VPTESTYrm")>; + +def WriteVTEST: SchedWriteRes<[JFPU0]> { + let Latency = 3; +} +def : InstRW<[WriteVTEST], (instregex "VTESTP(S|D)rr")>; +def : InstRW<[WriteVTEST], (instregex "VPTESTrr")>; + +def WriteVTESTLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; +} +def : InstRW<[WriteVTESTLd], (instregex "VTESTP(S|D)rm")>; +def : InstRW<[WriteVTESTLd], (instregex "VPTESTrm")>; + def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { let Latency = 54; let ResourceCycles = [54]; @@ -462,5 +739,16 @@ def WriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { } def : InstRW<[WriteVSQRTYPSLd], (instregex "VSQRTPSYm")>; +def WriteJVZEROALL: SchedWriteRes<[]> { + let Latency = 90; + let NumMicroOps = 73; +} +def : InstRW<[WriteJVZEROALL], (instregex "VZEROALL")>; + +def WriteJVZEROUPPER: SchedWriteRes<[]> { + let Latency = 46; + let NumMicroOps = 37; +} +def : InstRW<[WriteJVZEROUPPER], (instregex "VZEROUPPER")>; } // SchedModel |