aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td')
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td356
1 files changed, 322 insertions, 34 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 9dcc968a1a7a..6ea81a25e41c 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -135,6 +135,30 @@ def : WriteRes<WriteLEA, [JALU01]>;
defm : JWriteResIntPair<WriteShift, JALU01, 1>;
+def WriteSHLDrri : SchedWriteRes<[JALU01]> {
+ let Latency = 3;
+ let ResourceCycles = [6];
+ let NumMicroOps = 6;
+}
+def: InstRW<[WriteSHLDrri], (instregex "SHLD(16|32|64)rri8")>;
+def: InstRW<[WriteSHLDrri], (instregex "SHRD(16|32|64)rri8")>;
+
+def WriteSHLDrrCL : SchedWriteRes<[JALU01]> {
+ let Latency = 4;
+ let ResourceCycles = [8];
+ let NumMicroOps = 7;
+}
+def: InstRW<[WriteSHLDrrCL], (instregex "SHLD(16|32|64)rrCL")>;
+def: InstRW<[WriteSHLDrrCL], (instregex "SHRD(16|32|64)rrCL")>;
+
+def WriteSHLDm : SchedWriteRes<[JLAGU, JALU01]> {
+ let Latency = 9;
+ let ResourceCycles = [1, 22];
+ let NumMicroOps = 8;
+}
+def: InstRW<[WriteSHLDm], (instregex "SHLD(16|32|64)mr(i8|CL)")>;
+def: InstRW<[WriteSHLDm], (instregex "SHRD(16|32|64)mr(i8|CL)")>;
+
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
// FIXME: Split x86 and SSE load/store/moves
@@ -142,7 +166,10 @@ defm : JWriteResIntPair<WriteShift, JALU01, 1>;
def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
def : WriteRes<WriteStore, [JSAGU]>;
-def : WriteRes<WriteMove, [JAny]>;
+def : WriteRes<WriteMove, [JALU01]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
////////////////////////////////////////////////////////////////////////////////
// Idioms that clear a register, like xorps %xmm0, %xmm0.
@@ -168,6 +195,7 @@ defm : JWriteResIntPair<WriteJump, JALU01, 1>;
defm : JWriteResFpuPair<WriteFAdd, JFPU0, 3>;
defm : JWriteResFpuPair<WriteFMul, JFPU1, 2>;
+defm : JWriteResFpuPair<WriteFMA, JFPU1, 2>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteFRcp, JFPU1, 2>;
defm : JWriteResFpuPair<WriteFRsqrt, JFPU1, 2>;
defm : JWriteResFpuPair<WriteFShuffle, JFPU01, 1>;
@@ -199,11 +227,13 @@ defm : JWriteResFpuPair<WriteCvtF2F, JFPU1, 3>; // Float -> Float size conve
def : WriteRes<WriteFVarBlend, [JFPU01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ResourceCycles = [4];
+ let NumMicroOps = 3;
}
def : WriteRes<WriteFVarBlendLd, [JLAGU, JFPU01]> {
let Latency = 7;
- let ResourceCycles = [1, 2];
+ let ResourceCycles = [1, 4];
+ let NumMicroOps = 3;
}
// Vector integer operations.
@@ -217,21 +247,20 @@ defm : JWriteResFpuPair<WriteShuffle256, JFPU01, 1>;
def : WriteRes<WriteVarBlend, [JFPU01]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ResourceCycles = [4];
+ let NumMicroOps = 3;
}
def : WriteRes<WriteVarBlendLd, [JLAGU, JFPU01]> {
let Latency = 7;
- let ResourceCycles = [1, 2];
+ let ResourceCycles = [1, 4];
+ let NumMicroOps = 3;
}
// FIXME: why do we need to define AVX2 resource on CPU that doesn't have AVX2?
-def : WriteRes<WriteVarVecShift, [JFPU01]> {
- let Latency = 1;
- let ResourceCycles = [1];
-}
+def : WriteRes<WriteVarVecShift, [JFPU01]> {}
def : WriteRes<WriteVarVecShiftLd, [JLAGU, JFPU01]> {
let Latency = 6;
- let ResourceCycles = [1, 1];
+ let ResourceCycles = [1, 2];
}
def : WriteRes<WriteMPSAD, [JFPU0]> {
@@ -249,43 +278,49 @@ def : WriteRes<WriteMPSADLd, [JLAGU, JFPU0]> {
// FIXME: approximate latencies + pipe dependencies
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WritePCmpIStrM, [JFPU01]> {
- let Latency = 7;
- let ResourceCycles = [2];
+def : WriteRes<WritePCmpIStrM, [JFPU1,JFPU0]> {
+ let Latency = 8;
+ let ResourceCycles = [2, 2];
+ let NumMicroOps = 3;
}
-def : WriteRes<WritePCmpIStrMLd, [JLAGU, JFPU01]> {
- let Latency = 12;
- let ResourceCycles = [1, 2];
+def : WriteRes<WritePCmpIStrMLd, [JLAGU, JFPU1, JFPU0]> {
+ let Latency = 13;
+ let ResourceCycles = [1, 2, 2];
+ let NumMicroOps = 3;
}
// Packed Compare Explicit Length Strings, Return Mask
-def : WriteRes<WritePCmpEStrM, [JFPU01]> {
- let Latency = 13;
- let ResourceCycles = [5];
+def : WriteRes<WritePCmpEStrM, [JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> {
+ let Latency = 14;
+ let ResourceCycles = [5, 5, 5, 5, 5];
+ let NumMicroOps = 9;
}
-def : WriteRes<WritePCmpEStrMLd, [JLAGU, JFPU01]> {
- let Latency = 18;
- let ResourceCycles = [1, 5];
+def : WriteRes<WritePCmpEStrMLd, [JLAGU, JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> {
+ let Latency = 19;
+ let ResourceCycles = [1, 5, 5, 5, 5, 5];
+ let NumMicroOps = 9;
}
// Packed Compare Implicit Length Strings, Return Index
-def : WriteRes<WritePCmpIStrI, [JFPU01]> {
- let Latency = 6;
- let ResourceCycles = [2];
+def : WriteRes<WritePCmpIStrI, [JFPU1, JFPU0]> {
+ let Latency = 7;
+ let ResourceCycles = [2, 2];
}
-def : WriteRes<WritePCmpIStrILd, [JLAGU, JFPU01]> {
- let Latency = 11;
- let ResourceCycles = [1, 2];
+def : WriteRes<WritePCmpIStrILd, [JLAGU, JFPU1, JFPU0]> {
+ let Latency = 12;
+ let ResourceCycles = [1, 2, 2];
}
// Packed Compare Explicit Length Strings, Return Index
-def : WriteRes<WritePCmpEStrI, [JFPU01]> {
- let Latency = 13;
- let ResourceCycles = [5];
+def : WriteRes<WritePCmpEStrI, [JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> {
+ let Latency = 14;
+ let ResourceCycles = [5, 5, 5, 5, 5];
+ let NumMicroOps = 9;
}
-def : WriteRes<WritePCmpEStrILd, [JLAGU, JFPU01]> {
- let Latency = 18;
- let ResourceCycles = [1, 5];
+def : WriteRes<WritePCmpEStrILd, [JLAGU, JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> {
+ let Latency = 19;
+ let ResourceCycles = [1, 5, 5, 5, 5, 5];
+ let NumMicroOps = 9;
}
////////////////////////////////////////////////////////////////////////////////
@@ -371,6 +406,38 @@ def : WriteRes<WriteFence, [JSAGU]>;
def : WriteRes<WriteNop, []>;
////////////////////////////////////////////////////////////////////////////////
+// SSE4.1 instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def WriteDPPS: SchedWriteRes<[JFPU0, JFPU1]> {
+ let Latency = 11;
+ let ResourceCycles = [3,3];
+ let NumMicroOps = 5;
+}
+def : InstRW<[WriteDPPS], (instregex "(V)?DPPSrri")>;
+
+def WriteDPPSLd: SchedWriteRes<[JLAGU, JFPU0, JFPU1]> {
+ let Latency = 16;
+ let ResourceCycles = [1,3,3];
+ let NumMicroOps = 6;
+}
+def : InstRW<[WriteDPPSLd], (instregex "(V)?DPPSrmi")>;
+
+def WriteDPPD: SchedWriteRes<[JFPU0, JFPU1]> {
+ let Latency = 9;
+ let ResourceCycles = [3,3];
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteDPPD], (instregex "(V)?DPPDrri")>;
+
+def WriteDPPDLd: SchedWriteRes<[JLAGU, JFPU0, JFPU1]> {
+ let Latency = 14;
+ let ResourceCycles = [1,3,3];
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteDPPDLd], (instregex "(V)?DPPDrmi")>;
+
+////////////////////////////////////////////////////////////////////////////////
// SSE4A instructions.
////////////////////////////////////////////////////////////////////////////////
@@ -387,9 +454,73 @@ def WriteINSERTQ: SchedWriteRes<[JFPU01]> {
def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>;
////////////////////////////////////////////////////////////////////////////////
+// F16C instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def WriteCVT3: SchedWriteRes<[JFPU1]> {
+ let Latency = 3;
+}
+def : InstRW<[WriteCVT3], (instregex "VCVTPS2PHrr")>;
+def : InstRW<[WriteCVT3], (instregex "VCVTPH2PSrr")>;
+
+def WriteCVT3St: SchedWriteRes<[JFPU1, JSAGU]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVT3St], (instregex "VCVTPS2PHmr")>;
+
+def WriteCVT3Ld: SchedWriteRes<[JLAGU, JFPU1]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVT3Ld], (instregex "VCVTPH2PSrm")>;
+
+def WriteCVTPS2PHY: SchedWriteRes<[JFPU1, JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [2,2];
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteCVTPS2PHY], (instregex "VCVTPS2PHYrr")>;
+
+def WriteCVTPS2PHYSt: SchedWriteRes<[JFPU1, JFPU01, JSAGU]> {
+ let Latency = 11;
+ let ResourceCycles = [2,2,1];
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteCVTPS2PHYSt], (instregex "VCVTPS2PHYmr")>;
+
+def WriteCVTPH2PSY: SchedWriteRes<[JFPU1]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteCVTPH2PSY], (instregex "VCVTPH2PSYrr")>;
+
+def WriteCVTPH2PSYLd: SchedWriteRes<[JLAGU, JFPU1]> {
+ let Latency = 8;
+ let ResourceCycles = [1,2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteCVTPH2PSYLd], (instregex "VCVTPH2PSYrm")>;
+
+////////////////////////////////////////////////////////////////////////////////
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
+def WriteVDPPSY: SchedWriteRes<[JFPU1, JFPU0]> {
+ let Latency = 12;
+ let ResourceCycles = [6, 6];
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteVDPPSY], (instregex "VDPPSYrr")>;
+
+def WriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPU0]> {
+ let Latency = 17;
+ let ResourceCycles = [1, 6, 6];
+ let NumMicroOps = 11;
+}
+def : InstRW<[WriteVDPPSYLd, ReadAfterLd], (instregex "VDPPSYrm")>;
+
def WriteFAddY: SchedWriteRes<[JFPU0]> {
let Latency = 3;
let ResourceCycles = [2];
@@ -438,6 +569,152 @@ def WriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1]> {
}
def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>;
+def WriteVCVTY: SchedWriteRes<[JSTC]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteVCVTY], (instregex "VCVTDQ2P(S|D)Yrr")>;
+def : InstRW<[WriteVCVTY], (instregex "VROUNDYP(S|D)r")>;
+def : InstRW<[WriteVCVTY], (instregex "VCVTPS2DQYrr")>;
+def : InstRW<[WriteVCVTY], (instregex "VCVTTPS2DQYrr")>;
+
+def WriteVCVTYLd: SchedWriteRes<[JLAGU, JSTC]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTDQ2P(S|D)Yrm")>;
+def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VROUNDYP(S|D)m")>;
+def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTPS2DQYrm")>;
+def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTTPS2DQYrm")>;
+
+def WriteVMONTPSt: SchedWriteRes<[JSTC, JLAGU]> {
+ let Latency = 3;
+ let ResourceCycles = [2,1];
+}
+def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTP(S|D)Ymr")>;
+def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTDQYmr")>;
+
+def WriteVCVTPDY: SchedWriteRes<[JSTC, JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [2, 4];
+}
+def : InstRW<[WriteVCVTPDY], (instregex "VCVTPD2(DQ|PS)Yrr")>;
+def : InstRW<[WriteVCVTPDY], (instregex "VCVTTPD2DQYrr")>;
+
+def WriteVCVTPDYLd: SchedWriteRes<[JLAGU, JSTC, JFPU01]> {
+ let Latency = 11;
+ let ResourceCycles = [1, 2, 4];
+}
+def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTPD2(DQ|PS)Yrm")>;
+def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTTPD2DQYrm")>;
+
+def WriteVBlendVPY: SchedWriteRes<[JFPU01]> {
+ let Latency = 3;
+ let ResourceCycles = [6];
+}
+def : InstRW<[WriteVBlendVPY], (instregex "VBLENDVP(S|D)Yrr", "VPERMILP(D|S)Yrr")>;
+
+def WriteVBlendVPYLd: SchedWriteRes<[JLAGU, JFPU01]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 6];
+}
+def : InstRW<[WriteVBlendVPYLd, ReadAfterLd], (instregex "VBLENDVP(S|D)Yrm")>;
+
+def WriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 4];
+}
+def : InstRW<[WriteVBROADCASTYLd, ReadAfterLd], (instregex "VBROADCASTS(S|D)Yrm")>;
+
+def WriteFPAY22: SchedWriteRes<[JFPU0]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>;
+
+def WriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>;
+
+def WriteVHAddSubY: SchedWriteRes<[JFPU0]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteVHAddSubY], (instregex "VH(ADD|SUB)P(D|S)Yrr")>;
+
+def WriteVHAddSubYLd: SchedWriteRes<[JLAGU, JFPU0]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteVHAddSubYLd], (instregex "VH(ADD|SUB)P(D|S)Yrm")>;
+
+def WriteVMaskMovLd: SchedWriteRes<[JLAGU,JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteVMaskMovLd], (instregex "VMASKMOVP(D|S)rm")>;
+
+def WriteVMaskMovYLd: SchedWriteRes<[JLAGU,JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 4];
+}
+def : InstRW<[WriteVMaskMovYLd], (instregex "VMASKMOVP(D|S)Yrm")>;
+
+def WriteVMaskMovSt: SchedWriteRes<[JFPU01,JSAGU]> {
+ let Latency = 6;
+ let ResourceCycles = [4, 1];
+}
+def : InstRW<[WriteVMaskMovSt], (instregex "VMASKMOVP(D|S)mr")>;
+
+def WriteVMaskMovYSt: SchedWriteRes<[JFPU01,JSAGU]> {
+ let Latency = 6;
+ let ResourceCycles = [4, 1];
+}
+def : InstRW<[WriteVMaskMovYSt], (instregex "VMASKMOVP(D|S)Ymr")>;
+
+// TODO: In fact we have latency '2+i'. The +i represents an additional 1 cycle transfer
+// operation which moves the floating point result to the integer unit. During this
+// additional cycle the floating point unit execution resources are not occupied
+// and ALU0 in the integer unit is occupied instead.
+def WriteVMOVMSK: SchedWriteRes<[JFPU0]> {
+ let Latency = 3;
+}
+def : InstRW<[WriteVMOVMSK], (instregex "VMOVMSKP(D|S)(Y)?rr")>;
+
+// TODO: In fact we have latency '3+i'. The +i represents an additional 1 cycle transfer
+// operation which moves the floating point result to the integer unit. During this
+// additional cycle the floating point unit execution resources are not occupied
+// and ALU0 in the integer unit is occupied instead.
+def WriteVTESTY: SchedWriteRes<[JFPU01, JFPU0]> {
+ let Latency = 4;
+ let ResourceCycles = [2, 2];
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteVTESTY], (instregex "VTESTP(S|D)Yrr")>;
+def : InstRW<[WriteVTESTY], (instregex "VPTESTYrr")>;
+
+def WriteVTESTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPU0]> {
+ let Latency = 9;
+ let ResourceCycles = [1, 2, 2];
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteVTESTYLd], (instregex "VTESTP(S|D)Yrm")>;
+def : InstRW<[WriteVTESTYLd], (instregex "VPTESTYrm")>;
+
+def WriteVTEST: SchedWriteRes<[JFPU0]> {
+ let Latency = 3;
+}
+def : InstRW<[WriteVTEST], (instregex "VTESTP(S|D)rr")>;
+def : InstRW<[WriteVTEST], (instregex "VPTESTrr")>;
+
+def WriteVTESTLd: SchedWriteRes<[JLAGU, JFPU0]> {
+ let Latency = 8;
+}
+def : InstRW<[WriteVTESTLd], (instregex "VTESTP(S|D)rm")>;
+def : InstRW<[WriteVTESTLd], (instregex "VPTESTrm")>;
+
def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> {
let Latency = 54;
let ResourceCycles = [54];
@@ -462,5 +739,16 @@ def WriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> {
}
def : InstRW<[WriteVSQRTYPSLd], (instregex "VSQRTPSYm")>;
+def WriteJVZEROALL: SchedWriteRes<[]> {
+ let Latency = 90;
+ let NumMicroOps = 73;
+}
+def : InstRW<[WriteJVZEROALL], (instregex "VZEROALL")>;
+
+def WriteJVZEROUPPER: SchedWriteRes<[]> {
+ let Latency = 46;
+ let NumMicroOps = 37;
+}
+def : InstRW<[WriteJVZEROUPPER], (instregex "VZEROUPPER")>;
} // SchedModel