diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrInfo.cpp | 1565 |
1 files changed, 1179 insertions, 386 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index 627b6120b048..7b456fd68343 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -414,17 +414,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE }, { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE }, { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE }, - { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }, { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, - { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE }, - { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE }, { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE }, { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE }, { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE }, { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE }, + { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }, + { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE }, + { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE }, + { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE }, + { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE }, + { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE }, + { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE }, + { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE }, { X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE }, { X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE }, { X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE }, @@ -867,11 +872,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, - { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm, TB_NO_REVERSE }, { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, - { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm, TB_NO_REVERSE }, { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, - { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE }, + { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 }, + { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 }, { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 }, { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 }, @@ -883,8 +887,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 }, { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 }, { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, + { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE }, + { X86::VPABSBZrr, X86::VPABSBZrm, 0 }, { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, + { X86::VPABSWZrr, X86::VPABSWZrm, 0 }, { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 }, { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, @@ -904,12 +911,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 }, { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 }, { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 }, + { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 }, + { X86::VPSLLDZri, X86::VPSLLDZmi, 0 }, + { X86::VPSLLQZri, X86::VPSLLQZmi, 0 }, + { X86::VPSLLWZri, X86::VPSLLWZmi, 0 }, + { X86::VPSRADZri, X86::VPSRADZmi, 0 }, + { X86::VPSRAQZri, X86::VPSRAQZmi, 0 }, + { X86::VPSRAWZri, X86::VPSRAWZmi, 0 }, + { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 }, + { X86::VPSRLDZri, X86::VPSRLDZmi, 0 }, + { X86::VPSRLQZri, X86::VPSRLQZmi, 0 }, + { X86::VPSRLWZri, X86::VPSRLWZmi, 0 }, // AVX-512 foldable instructions (256-bit versions) { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ256r_s, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, - { X86::VBROADCASTSDZ256r_s, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, @@ -920,6 +936,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 }, { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 }, { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 }, + { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 }, + { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 }, + { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 }, + { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 }, { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 }, { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 }, { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 }, @@ -939,10 +959,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 }, { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 }, { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 }, + { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 }, + { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 }, + { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 }, + { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 }, + { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 }, + { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 }, + { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 }, + { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 }, + { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 }, + { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 }, + { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 }, // AVX-512 foldable instructions (128-bit versions) { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ128r_s, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, @@ -953,6 +983,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 }, { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 }, { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 }, + { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 }, + { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 }, + { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 }, + { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 }, { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 }, { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 }, { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE }, @@ -970,6 +1004,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 }, { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 }, { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 }, + { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 }, + { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 }, + { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 }, + { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 }, + { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 }, + { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 }, + { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 }, + { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 }, + { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 }, + { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 }, + { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 }, // F16C foldable instructions { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 }, @@ -1170,18 +1215,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PINSRWrri, X86::PINSRWrmi, 0 }, { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 }, { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, + { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, + { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, - { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, - { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, + { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, + { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 }, { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 }, + { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, + { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 }, { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 }, - { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, - { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, - { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, - { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, { X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 }, { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, @@ -1340,8 +1385,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PMULHRWrr, X86::PMULHRWrm, 0 }, // AVX 128-bit versions of foldable instructions - { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, - { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, TB_NO_REVERSE }, { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, @@ -1350,8 +1393,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, - { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, - { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, TB_NO_REVERSE }, { X86::VADDPDrr, X86::VADDPDrm, 0 }, { X86::VADDPSrr, X86::VADDPSrm, 0 }, { X86::VADDSDrr, X86::VADDSDrm, 0 }, @@ -1458,18 +1499,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPINSRWrri, X86::VPINSRWrmi, 0 }, { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 }, { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 }, + { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 }, + { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 }, { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 }, { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 }, - { X86::VPMINSWrr, X86::VPMINSWrm, 0 }, - { X86::VPMINUBrr, X86::VPMINUBrm, 0 }, + { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 }, + { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 }, { X86::VPMINSBrr, X86::VPMINSBrm, 0 }, { X86::VPMINSDrr, X86::VPMINSDrm, 0 }, + { X86::VPMINSWrr, X86::VPMINSWrm, 0 }, + { X86::VPMINUBrr, X86::VPMINUBrm, 0 }, { X86::VPMINUDrr, X86::VPMINUDrm, 0 }, { X86::VPMINUWrr, X86::VPMINUWrm, 0 }, - { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 }, - { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 }, - { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 }, - { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 }, { X86::VPMULDQrr, X86::VPMULDQrm, 0 }, { X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 }, { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 }, @@ -1626,18 +1667,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 }, { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 }, { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 }, + { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 }, + { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 }, { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 }, { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 }, - { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 }, - { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 }, + { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 }, + { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 }, { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 }, { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 }, + { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 }, + { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 }, { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 }, { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 }, - { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 }, - { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 }, - { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 }, - { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 }, { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 }, { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 }, { X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 }, @@ -1732,7 +1773,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // XOP foldable instructions { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 }, - { X86::VPCMOVrrrY, X86::VPCMOVrmrY, 0 }, + { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 }, { X86::VPCOMBri, X86::VPCOMBmi, 0 }, { X86::VPCOMDri, X86::VPCOMDmi, 0 }, { X86::VPCOMQri, X86::VPCOMQmi, 0 }, @@ -1742,9 +1783,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPCOMUQri, X86::VPCOMUQmi, 0 }, { X86::VPCOMUWri, X86::VPCOMUWmi, 0 }, { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 }, - { X86::VPERMIL2PDrrY, X86::VPERMIL2PDmrY, 0 }, + { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 }, { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 }, - { X86::VPERMIL2PSrrY, X86::VPERMIL2PSmrY, 0 }, + { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 }, { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 }, { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 }, { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 }, @@ -1800,8 +1841,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 }, { X86::VANDPDZrr, X86::VANDPDZrm, 0 }, { X86::VANDPSZrr, X86::VANDPSZrm, 0 }, - { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, - { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 }, { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 }, { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 }, @@ -1842,6 +1881,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE }, { X86::VMINSSZrr, X86::VMINSSZrm, 0 }, { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE }, + { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE }, { X86::VMULPDZrr, X86::VMULPDZrm, 0 }, { X86::VMULPSZrr, X86::VMULPSZrm, 0 }, { X86::VMULSDZrr, X86::VMULSDZrm, 0 }, @@ -1850,6 +1890,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE }, { X86::VORPDZrr, X86::VORPDZrm, 0 }, { X86::VORPSZrr, X86::VORPSZrm, 0 }, + { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 }, + { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 }, + { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 }, + { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 }, { X86::VPADDBZrr, X86::VPADDBZrm, 0 }, { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, @@ -1863,6 +1907,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 }, { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 }, { X86::VPANDQZrr, X86::VPANDQZrm, 0 }, + { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 }, + { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 }, { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 }, { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 }, { X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 }, @@ -1887,26 +1933,55 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, { X86::VPERMQZrr, X86::VPERMQZrm, 0 }, { X86::VPERMWZrr, X86::VPERMWZrm, 0 }, + { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 }, + { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 }, + { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 }, + { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 }, { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 }, { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 }, + { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 }, { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 }, { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 }, + { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 }, + { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 }, { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 }, { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 }, + { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 }, + { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 }, { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 }, { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 }, + { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 }, + { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 }, { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 }, { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 }, + { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 }, { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 }, + { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 }, + { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 }, + { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 }, { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, { X86::VPORDZrr, X86::VPORDZrm, 0 }, { X86::VPORQZrr, X86::VPORQZrm, 0 }, + { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 }, { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 }, + { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 }, + { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 }, { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, + { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 }, + { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 }, + { X86::VPSRADZrr, X86::VPSRADZrm, 0 }, + { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 }, { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, + { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 }, + { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 }, + { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 }, + { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 }, + { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 }, { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 }, { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 }, + { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 }, + { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 }, { X86::VPSUBBZrr, X86::VPSUBBZrm, 0 }, { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 }, { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 }, @@ -1957,9 +2032,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 }, { X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 }, { X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 }, - { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE }, - { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE }, { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 }, { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 }, { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 }, @@ -1996,6 +2068,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 }, { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 }, { X86::VORPSZ256rr, X86::VORPSZ256rm, 0 }, + { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 }, + { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 }, + { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 }, + { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 }, + { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 }, + { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 }, + { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 }, + { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 }, { X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 }, { X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 }, { X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 }, @@ -2022,6 +2102,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 }, { X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 }, { X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 }, + { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 }, + { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 }, + { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 }, + { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 }, { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 }, { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 }, { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 }, @@ -2070,12 +2154,92 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 }, { X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 }, { X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 }, + { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 }, + { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 }, + { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 }, + { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 }, + { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 }, + { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 }, + { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 }, + { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 }, + { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 }, + { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 }, + { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 }, + { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 }, + { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 }, + { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 }, + { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 }, + { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 }, + { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 }, + { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 }, + { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 }, + { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 }, + { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 }, + { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 }, + { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 }, + { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 }, + { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 }, + { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 }, + { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 }, + { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 }, + { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 }, + { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 }, + { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 }, + { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 }, + { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 }, + { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 }, + { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 }, + { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 }, + { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 }, + { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 }, + { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 }, + { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 }, + { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 }, + { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 }, { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 }, { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 }, { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 }, { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 }, + { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 }, + { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 }, { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 }, { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 }, + { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 }, + { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 }, + { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 }, + { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 }, + { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 }, + { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 }, + { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 }, + { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 }, + { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 }, + { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 }, + { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 }, + { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 }, + { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 }, + { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 }, + { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 }, + { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 }, + { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 }, + { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 }, + { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 }, + { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 }, + { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 }, + { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 }, + { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 }, + { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 }, + { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 }, + { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 }, + { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 }, + { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 }, + { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 }, + { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 }, + { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 }, + { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 }, + { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 }, + { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 }, + { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 }, + { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 }, { X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 }, { X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 }, { X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 }, @@ -2112,6 +2276,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 }, { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 }, { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 }, + { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 }, + { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 }, + { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 }, + { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 }, { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 }, { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 }, { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 }, @@ -2130,6 +2298,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 }, // AVX-512 masked foldable instructions + { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, + { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 }, + { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 }, + { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 }, + { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 }, { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 }, { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 }, { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 }, @@ -2149,8 +2323,23 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 }, { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 }, { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 }, + { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 }, + { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 }, + { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 }, + { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 }, + { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 }, + { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 }, + { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 }, + { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 }, + { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 }, // AVX-512VL 256-bit masked foldable instructions + { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE }, + { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 }, + { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 }, + { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 }, + { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 }, { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 }, { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 }, { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 }, @@ -2170,8 +2359,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 }, { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 }, { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 }, + { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 }, + { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 }, + { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 }, + { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 }, + { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 }, + { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 }, + { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 }, + { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 }, + { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 }, // AVX-512VL 128-bit masked foldable instructions + { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE }, + { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 }, + { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 }, + { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 }, + { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 }, { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 }, { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 }, { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE }, @@ -2189,6 +2392,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 }, { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 }, { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 }, + { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 }, + { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 }, + { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 }, + { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 }, + { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 }, + { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 }, + { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 }, + { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 }, + { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 }, // AES foldable instructions { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, @@ -2262,23 +2474,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // XOP foldable instructions { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 }, - { X86::VPCMOVrrrY, X86::VPCMOVrrmY, 0 }, + { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 }, { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 }, - { X86::VPERMIL2PDrrY, X86::VPERMIL2PDrmY, 0 }, + { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 }, { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 }, - { X86::VPERMIL2PSrrY, X86::VPERMIL2PSrmY, 0 }, + { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 }, { X86::VPPERMrrr, X86::VPPERMrrm, 0 }, // AVX-512 instructions with 3 source operands. - { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 }, - { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 }, - { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 }, - { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 }, - { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE }, - { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE }, - { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE }, { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 }, { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, @@ -2329,6 +2532,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 masked instructions { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 }, { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 }, + { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE }, { X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 }, { X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 }, { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 }, @@ -2337,6 +2542,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 }, { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 }, { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 }, + { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE }, { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 }, { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 }, { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 }, @@ -2349,14 +2556,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 }, { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 }, { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 }, + { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 }, + { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 }, { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 }, { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 }, { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 }, { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 }, + { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 }, + { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 }, { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 }, { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 }, + { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE }, { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 }, { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 }, + { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 }, + { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 }, + { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 }, + { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 }, { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 }, { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 }, { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 }, @@ -2370,6 +2587,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 }, { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 }, { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 }, + { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 }, + { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 }, { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 }, { X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 }, { X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 }, @@ -2380,9 +2599,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 }, { X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 }, { X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 }, + { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 }, + { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 }, + { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 }, + { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 }, + { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 }, + { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 }, + { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 }, + { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 }, + { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 }, + { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 }, + { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 }, + { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 }, + { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 }, + { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 }, + { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 }, + { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 }, + { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 }, + { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 }, + { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 }, + { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 }, + { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 }, { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 }, { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 }, { X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 }, + { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 }, + { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 }, + { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 }, + { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 }, + { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 }, + { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 }, + { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 }, + { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 }, + { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 }, + { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 }, + { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 }, + { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 }, + { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 }, + { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 }, + { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 }, + { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 }, + { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 }, + { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 }, { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 }, { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 }, { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 }, @@ -2401,8 +2659,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 }, { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 }, { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 }, + { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 }, + { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 }, { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 }, { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 }, + { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE }, { X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 }, { X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 }, { X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 }, @@ -2437,6 +2699,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 }, { X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 }, { X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 }, + { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 }, + { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 }, + { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 }, + { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 }, { X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 }, { X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 }, { X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 }, @@ -2450,6 +2716,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 }, { X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 }, { X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 }, + { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 }, + { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 }, { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 }, { X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 }, { X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 }, @@ -2460,9 +2728,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 }, { X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 }, { X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 }, + { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 }, + { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 }, + { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 }, + { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 }, + { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 }, + { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 }, + { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 }, + { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 }, + { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 }, + { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 }, + { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 }, + { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 }, + { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 }, + { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 }, + { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 }, + { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 }, + { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 }, + { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 }, + { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 }, + { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 }, + { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 }, { X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 }, { X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 }, { X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 }, + { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 }, + { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 }, + { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 }, + { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 }, + { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 }, + { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 }, + { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 }, + { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 }, + { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 }, + { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 }, + { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 }, + { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 }, + { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 }, + { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 }, + { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 }, + { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 }, + { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 }, + { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 }, { X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 }, { X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 }, { X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 }, @@ -2481,6 +2788,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 }, { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 }, { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 }, + { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 }, + { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 }, { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 }, { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 }, { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 }, @@ -2513,6 +2822,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 }, { X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 }, { X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 }, + { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 }, + { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 }, + { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 }, + { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 }, { X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 }, { X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 }, { X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 }, @@ -2526,15 +2839,56 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 }, { X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 }, { X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 }, + { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 }, + { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 }, { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 }, { X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 }, { X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 }, { X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 }, { X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 }, { X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 }, + { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 }, + { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 }, + { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 }, + { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 }, + { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 }, + { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 }, + { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 }, + { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 }, + { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 }, + { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 }, + { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 }, + { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 }, + { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 }, + { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 }, + { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 }, + { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 }, + { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 }, + { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 }, + { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 }, + { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 }, + { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 }, { X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 }, { X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 }, { X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 }, + { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 }, + { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 }, + { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 }, + { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 }, + { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 }, + { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 }, + { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 }, + { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 }, + { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 }, + { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 }, + { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 }, + { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 }, + { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 }, + { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 }, + { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 }, + { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 }, + { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 }, + { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 }, { X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 }, { X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 }, { X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 }, @@ -2553,6 +2907,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 }, { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 }, { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 }, + { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 }, + { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 }, { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 }, { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 }, { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 }, @@ -2563,6 +2919,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 }, // AVX-512 masked foldable instructions + { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, + { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 }, + { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 }, + { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 }, + { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 }, { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 }, { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 }, { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 }, @@ -2582,8 +2944,23 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 }, { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 }, { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 }, + { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 }, + { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 }, + { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 }, + { X86::VPSRADZrik, X86::VPSRADZmik, 0 }, + { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 }, + { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 }, + { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 }, + { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 }, + { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 }, // AVX-512VL 256-bit masked foldable instructions + { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE }, + { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 }, + { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 }, + { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 }, + { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 }, { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 }, { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 }, { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 }, @@ -2603,8 +2980,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 }, { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 }, { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 }, + { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 }, + { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 }, + { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 }, + { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 }, + { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 }, + { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 }, + { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 }, + { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 }, + { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 }, // AVX-512VL 128-bit masked foldable instructions + { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE }, + { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 }, + { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 }, + { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 }, + { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 }, { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 }, { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 }, { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE }, @@ -2622,6 +3013,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 }, { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 }, { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 }, + { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 }, + { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 }, + { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 }, + { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 }, + { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 }, + { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 }, + { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 }, + { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 }, + { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 }, }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { @@ -2651,6 +3051,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable masked instructions { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 }, { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 }, + { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE }, + { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE }, { X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 }, { X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 }, { X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 }, @@ -2659,6 +3061,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 }, { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 }, { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 }, + { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE }, + { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE }, { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 }, { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 }, { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 }, @@ -2671,14 +3075,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 }, { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 }, { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 }, + { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 }, + { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 }, { X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 }, { X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 }, { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 }, { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 }, + { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 }, + { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 }, { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 }, { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 }, + { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE }, + { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE }, { X86::VORPDZrrk, X86::VORPDZrmk, 0 }, { X86::VORPSZrrk, X86::VORPSZrmk, 0 }, + { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 }, + { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 }, + { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 }, + { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 }, { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 }, { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 }, { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 }, @@ -2692,6 +3106,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 }, { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 }, { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 }, + { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 }, + { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 }, { X86::VPERMBZrrk, X86::VPERMBZrmk, 0 }, { X86::VPERMDZrrk, X86::VPERMDZrmk, 0 }, { X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 }, @@ -2714,9 +3130,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPERMWZrrk, X86::VPERMWZrmk, 0 }, { X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 }, { X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 }, + { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 }, + { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 }, + { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 }, + { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 }, + { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 }, + { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 }, + { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 }, + { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 }, + { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 }, + { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 }, + { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 }, + { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 }, + { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 }, + { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 }, + { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 }, + { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 }, + { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 }, + { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 }, + { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 }, + { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 }, + { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 }, { X86::VPORDZrrk, X86::VPORDZrmk, 0 }, { X86::VPORQZrrk, X86::VPORQZrmk, 0 }, { X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 }, + { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 }, + { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 }, + { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 }, + { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 }, + { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 }, + { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 }, + { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 }, + { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 }, + { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 }, + { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 }, + { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 }, + { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 }, + { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 }, + { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 }, + { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 }, + { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 }, + { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 }, + { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 }, { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 }, { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 }, { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 }, @@ -2736,8 +3191,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 }, { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 }, { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 }, + { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 }, + { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 }, { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 }, { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 }, + { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE }, + { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE }, { X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 }, { X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 }, { X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 }, @@ -2772,6 +3231,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 }, { X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 }, { X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 }, + { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 }, + { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 }, + { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 }, + { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 }, { X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 }, { X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 }, { X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 }, @@ -2785,6 +3248,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 }, { X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 }, { X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 }, + { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 }, + { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 }, { X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 }, { X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 }, { X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 }, @@ -2807,9 +3272,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 }, { X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 }, { X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 }, + { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 }, + { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 }, + { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 }, + { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 }, + { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 }, + { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 }, + { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 }, + { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 }, + { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 }, + { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 }, + { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 }, + { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 }, + { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 }, + { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 }, + { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 }, + { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 }, + { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 }, + { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 }, + { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 }, + { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 }, + { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 }, { X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 }, { X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 }, { X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 }, + { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 }, + { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 }, + { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 }, + { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 }, + { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 }, + { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 }, + { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 }, + { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 }, + { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 }, + { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 }, + { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 }, + { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 }, + { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 }, + { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 }, + { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 }, + { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 }, + { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 }, + { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 }, { X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 }, { X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 }, { X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 }, @@ -2830,6 +3334,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 }, { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 }, { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 }, + { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 }, + { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 }, { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 }, { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 }, { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 }, @@ -2862,6 +3368,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 }, { X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 }, { X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 }, + { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 }, + { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 }, + { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 }, + { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 }, { X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 }, { X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 }, { X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 }, @@ -2875,6 +3385,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 }, { X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 }, { X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 }, + { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 }, + { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 }, { X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 }, { X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 }, { X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 }, @@ -2893,9 +3405,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 }, { X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 }, { X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 }, + { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 }, + { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 }, + { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 }, + { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 }, + { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 }, + { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 }, + { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 }, + { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 }, + { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 }, + { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 }, + { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 }, + { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 }, + { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 }, + { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 }, + { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 }, + { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 }, + { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 }, + { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 }, + { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 }, + { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 }, + { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 }, { X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 }, { X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 }, { X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 }, + { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 }, + { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 }, + { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 }, + { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 }, + { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 }, + { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 }, + { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 }, + { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 }, + { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 }, + { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 }, + { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 }, + { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 }, + { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 }, + { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 }, + { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 }, + { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 }, + { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 }, + { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 }, { X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 }, { X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 }, { X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 }, @@ -2916,6 +3467,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 }, { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 }, { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 }, + { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 }, + { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 }, { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 }, { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 }, { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 }, @@ -3063,18 +3616,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const { const MachineFunction *MF = MI.getParent()->getParent(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); - if (MI.getOpcode() == getCallFrameSetupOpcode() || - MI.getOpcode() == getCallFrameDestroyOpcode()) { + if (isFrameInstr(MI)) { unsigned StackAlign = TFI->getStackAlignment(); - int SPAdj = - (MI.getOperand(0).getImm() + StackAlign - 1) / StackAlign * StackAlign; - - SPAdj -= MI.getOperand(1).getImm(); - - if (MI.getOpcode() == getCallFrameSetupOpcode()) - return SPAdj; - else - return -SPAdj; + int SPAdj = alignTo(getFrameSize(MI), StackAlign); + SPAdj -= getFrameAdjustment(MI); + if (!isFrameSetup(MI)) + SPAdj = -SPAdj; + return SPAdj; } // To know whether a call adjusts the stack, we need information @@ -3569,7 +4117,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, const DebugLoc &DL = Orig.getDebugLoc(); BuildMI(MBB, I, DL, get(X86::MOV32ri)) - .addOperand(Orig.getOperand(0)) + .add(Orig.getOperand(0)) .addImm(Value); } else { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); @@ -3654,10 +4202,10 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, // Virtual register of the wrong class, we have to create a temporary 64-bit // vreg to feed into the LEA. NewSrc = MF.getRegInfo().createVirtualRegister(RC); - MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), - get(TargetOpcode::COPY)) - .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit) - .addOperand(Src); + MachineInstr *Copy = + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit) + .add(Src); // Which is obviously going to be dead after we're done with it. isKill = true; @@ -3823,10 +4371,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)) - .addOperand(Dest) + .add(Dest) .addReg(0) .addImm(1ULL << ShAmt) - .addOperand(Src) + .add(Src) .addImm(0) .addReg(0); break; @@ -3848,14 +4396,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) - .addOperand(Dest) + .add(Dest) .addReg(0) .addImm(1ULL << ShAmt) .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) .addImm(0) .addReg(0); if (ImplicitOp.getReg() != 0) - MIB.addOperand(ImplicitOp); + MIB.add(ImplicitOp); NewMI = MIB; break; @@ -3869,10 +4417,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) : nullptr; NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)) - .addOperand(Dest) + .add(Dest) .addReg(0) .addImm(1ULL << ShAmt) - .addOperand(Src) + .add(Src) .addImm(0) .addReg(0); break; @@ -3891,11 +4439,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) - .addOperand(Dest) + .add(Dest) .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)); if (ImplicitOp.getReg() != 0) - MIB.addOperand(ImplicitOp); + MIB.add(ImplicitOp); NewMI = addOffset(MIB, 1); break; @@ -3905,10 +4453,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) : nullptr; assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!"); - NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)) - .addOperand(Dest) - .addOperand(Src), - 1); + NewMI = addOffset( + BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), 1); break; case X86::DEC64r: case X86::DEC32r: { @@ -3924,11 +4470,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) - .addOperand(Dest) + .add(Dest) .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill)); if (ImplicitOp.getReg() != 0) - MIB.addOperand(ImplicitOp); + MIB.add(ImplicitOp); NewMI = addOffset(MIB, -1); @@ -3939,10 +4485,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) : nullptr; assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!"); - NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)) - .addOperand(Dest) - .addOperand(Src), - -1); + NewMI = addOffset( + BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), -1); break; case X86::ADD64rr: case X86::ADD64rr_DB: @@ -3970,12 +4514,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, SrcReg2, isKill2, isUndef2, ImplicitOp2, LV)) return nullptr; - MachineInstrBuilder MIB = - BuildMI(MF, MI.getDebugLoc(), get(Opc)).addOperand(Dest); + MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest); if (ImplicitOp.getReg() != 0) - MIB.addOperand(ImplicitOp); + MIB.add(ImplicitOp); if (ImplicitOp2.getReg() != 0) - MIB.addOperand(ImplicitOp2); + MIB.add(ImplicitOp2); NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2); @@ -3995,9 +4538,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Src2 = MI.getOperand(2).getReg(); bool isKill2 = MI.getOperand(2).isKill(); - NewMI = addRegReg( - BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).addOperand(Dest), - Src.getReg(), Src.isKill(), Src2, isKill2); + NewMI = addRegReg(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest), + Src.getReg(), Src.isKill(), Src2, isKill2); // Preserve undefness of the operands. bool isUndef = MI.getOperand(1).isUndef(); @@ -4014,10 +4556,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32_DB: case X86::ADD64ri8_DB: assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)) - .addOperand(Dest) - .addOperand(Src), - MI.getOperand(2)); + NewMI = addOffset( + BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src), + MI.getOperand(2)); break; case X86::ADD32ri: case X86::ADD32ri8: @@ -4034,11 +4575,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) - .addOperand(Dest) + .add(Dest) .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill)); if (ImplicitOp.getReg() != 0) - MIB.addOperand(ImplicitOp); + MIB.add(ImplicitOp); NewMI = addOffset(MIB, MI.getOperand(2)); break; @@ -4051,12 +4592,136 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV) : nullptr; assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)) - .addOperand(Dest) - .addOperand(Src), - MI.getOperand(2)); + NewMI = addOffset( + BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), + MI.getOperand(2)); + break; + + case X86::VMOVDQU8Z128rmk: + case X86::VMOVDQU8Z256rmk: + case X86::VMOVDQU8Zrmk: + case X86::VMOVDQU16Z128rmk: + case X86::VMOVDQU16Z256rmk: + case X86::VMOVDQU16Zrmk: + case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk: + case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk: + case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk: + case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk: + case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk: + case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk: + case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk: + case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk: + case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk: + case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk: + case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk: + case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: { + unsigned Opc; + switch (MIOpc) { + default: llvm_unreachable("Unreachable!"); + case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; + case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; + case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; + case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; + case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; + case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; + case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; + case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; + case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; + case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; + case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; + case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; + case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; + case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; + case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; + case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; + case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; + case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; + case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; + case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; + case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; + case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; + case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; + case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; + case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; + case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; + case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; + case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; + case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + } + + NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) + .add(Dest) + .add(MI.getOperand(2)) + .add(Src) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)) + .add(MI.getOperand(6)) + .add(MI.getOperand(7)); break; } + case X86::VMOVDQU8Z128rrk: + case X86::VMOVDQU8Z256rrk: + case X86::VMOVDQU8Zrrk: + case X86::VMOVDQU16Z128rrk: + case X86::VMOVDQU16Z256rrk: + case X86::VMOVDQU16Zrrk: + case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk: + case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk: + case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk: + case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk: + case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk: + case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk: + case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk: + case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk: + case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk: + case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk: + case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk: + case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: { + unsigned Opc; + switch (MIOpc) { + default: llvm_unreachable("Unreachable!"); + case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break; + case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break; + case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break; + case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break; + case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break; + case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break; + case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break; + case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break; + case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break; + case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break; + case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break; + case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break; + case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break; + case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break; + case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break; + case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break; + case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break; + case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break; + case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break; + case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break; + case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break; + case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break; + case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break; + case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break; + case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break; + case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break; + case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break; + case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break; + case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break; + case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break; + } + + NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) + .add(Dest) + .add(MI.getOperand(2)) + .add(Src) + .add(MI.getOperand(3)); + break; + } + } if (!NewMI) return nullptr; @@ -4337,6 +5002,18 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } + case X86::PFSUBrr: + case X86::PFSUBRrr: { + // PFSUB x, y: x = x - y + // PFSUBR x, y: x = y - x + unsigned Opc = + (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr); + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.setDesc(get(Opc)); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + break; + } case X86::BLENDPDrri: case X86::BLENDPSrri: case X86::PBLENDWrri: @@ -4606,18 +5283,30 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: - case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik: - case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik: - case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik: - case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik: - case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik: - case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik: + case X86::VPTERNLOGDZrrik: + case X86::VPTERNLOGDZ128rrik: + case X86::VPTERNLOGDZ256rrik: + case X86::VPTERNLOGQZrrik: + case X86::VPTERNLOGQZ128rrik: + case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: - case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: { + case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: + case X86::VPTERNLOGDZ128rmbi: + case X86::VPTERNLOGDZ256rmbi: + case X86::VPTERNLOGDZrmbi: + case X86::VPTERNLOGQZ128rmbi: + case X86::VPTERNLOGQZ256rmbi: + case X86::VPTERNLOGQZrmbi: + case X86::VPTERNLOGDZ128rmbikz: + case X86::VPTERNLOGDZ256rmbikz: + case X86::VPTERNLOGDZrmbikz: + case X86::VPTERNLOGQZ128rmbikz: + case X86::VPTERNLOGQZ256rmbikz: + case X86::VPTERNLOGQZrmbikz: { auto &WorkingMI = cloneIfNew(MI); if (!commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2)) return nullptr; @@ -4798,18 +5487,30 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: - case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik: - case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik: - case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik: - case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik: - case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik: - case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik: + case X86::VPTERNLOGDZrrik: + case X86::VPTERNLOGDZ128rrik: + case X86::VPTERNLOGDZ256rrik: + case X86::VPTERNLOGQZrrik: + case X86::VPTERNLOGQZ128rrik: + case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: + case X86::VPTERNLOGDZ128rmbi: + case X86::VPTERNLOGDZ256rmbi: + case X86::VPTERNLOGDZrmbi: + case X86::VPTERNLOGQZ128rmbi: + case X86::VPTERNLOGQZ256rmbi: + case X86::VPTERNLOGQZrmbi: + case X86::VPTERNLOGDZ128rmbikz: + case X86::VPTERNLOGDZ256rmbikz: + case X86::VPTERNLOGDZrmbikz: + case X86::VPTERNLOGQZ128rmbikz: + case X86::VPTERNLOGQZ256rmbikz: + case X86::VPTERNLOGQZrmbikz: return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); default: const X86InstrFMA3Group *FMA3Group = @@ -5108,6 +5809,95 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { return !isPredicated(MI); } +bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + return true; + default: + return false; + } +} + +bool X86InstrInfo::canMakeTailCallConditional( + SmallVectorImpl<MachineOperand> &BranchCond, + const MachineInstr &TailCall) const { + if (TailCall.getOpcode() != X86::TCRETURNdi && + TailCall.getOpcode() != X86::TCRETURNdi64) { + // Only direct calls can be done with a conditional branch. + return false; + } + + const MachineFunction *MF = TailCall.getParent()->getParent(); + if (Subtarget.isTargetWin64() && MF->hasWinCFI()) { + // Conditional tail calls confuse the Win64 unwinder. + return false; + } + + assert(BranchCond.size() == 1); + if (BranchCond[0].getImm() > X86::LAST_VALID_COND) { + // Can't make a conditional tail call with this condition. + return false; + } + + const X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); + if (X86FI->getTCReturnAddrDelta() != 0 || + TailCall.getOperand(1).getImm() != 0) { + // A conditional tail call cannot do any stack adjustment. + return false; + } + + return true; +} + +void X86InstrInfo::replaceBranchWithTailCall( + MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond, + const MachineInstr &TailCall) const { + assert(canMakeTailCallConditional(BranchCond, TailCall)); + + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + if (!I->isBranch()) + assert(0 && "Can't find the branch to replace!"); + + X86::CondCode CC = getCondFromBranchOpc(I->getOpcode()); + assert(BranchCond.size() == 1); + if (CC != BranchCond[0].getImm()) + continue; + + break; + } + + unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc + : X86::TCRETURNdi64cc; + + auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc)); + MIB->addOperand(TailCall.getOperand(0)); // Destination. + MIB.addImm(0); // Stack offset (not used). + MIB->addOperand(BranchCond[0]); // Condition. + MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters. + + // Add implicit uses and defs of all live regs potentially clobbered by the + // call. This way they still appear live across the call. + LivePhysRegs LiveRegs(&getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + SmallVector<std::pair<unsigned, const MachineOperand *>, 8> Clobbers; + LiveRegs.stepForward(*MIB, Clobbers); + for (const auto &C : Clobbers) { + MIB.addReg(C.first, RegState::Implicit); + MIB.addReg(C.first, RegState::Implicit | RegState::Define); + } + + I->eraseFromParent(); +} + // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may // not be a fallthrough MBB now due to layout changes). Return nullptr if the // fallthrough MBB cannot be identified. @@ -5514,8 +6304,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg, // SrcReg(MaskReg) -> DestReg(GR64) // SrcReg(MaskReg) -> DestReg(GR32) - // SrcReg(MaskReg) -> DestReg(GR16) - // SrcReg(MaskReg) -> DestReg(GR8) // All KMASK RegClasses hold the same k registers, can be tested against anyone. if (X86::VK16RegClass.contains(SrcReg)) { @@ -5525,20 +6313,10 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg, } if (X86::GR32RegClass.contains(DestReg)) return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk; - if (X86::GR16RegClass.contains(DestReg)) { - DestReg = getX86SubSuperRegister(DestReg, 32); - return X86::KMOVWrk; - } - if (X86::GR8RegClass.contains(DestReg)) { - DestReg = getX86SubSuperRegister(DestReg, 32); - return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk; - } } // SrcReg(GR64) -> DestReg(MaskReg) // SrcReg(GR32) -> DestReg(MaskReg) - // SrcReg(GR16) -> DestReg(MaskReg) - // SrcReg(GR8) -> DestReg(MaskReg) // All KMASK RegClasses hold the same k registers, can be tested against anyone. if (X86::VK16RegClass.contains(DestReg)) { @@ -5548,14 +6326,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg, } if (X86::GR32RegClass.contains(SrcReg)) return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr; - if (X86::GR16RegClass.contains(SrcReg)) { - SrcReg = getX86SubSuperRegister(SrcReg, 32); - return X86::KMOVWkr; - } - if (X86::GR8RegClass.contains(SrcReg)) { - SrcReg = getX86SubSuperRegister(SrcReg, 32); - return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr; - } } @@ -5965,7 +6735,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); + MIB.add(Addr[i]); MIB.addReg(SrcReg, getKillRegState(isKill)); (*MIB).setMemRefs(MMOBegin, MMOEnd); NewMIs.push_back(MIB); @@ -6000,7 +6770,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); + MIB.add(Addr[i]); (*MIB).setMemRefs(MMOBegin, MMOEnd); NewMIs.push_back(MIB); } @@ -6017,12 +6787,14 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case X86::CMP16ri: case X86::CMP16ri8: case X86::CMP8ri: - if (!MI.getOperand(1).isImm()) - return false; SrcReg = MI.getOperand(0).getReg(); SrcReg2 = 0; - CmpMask = ~0; - CmpValue = MI.getOperand(1).getImm(); + if (MI.getOperand(1).isImm()) { + CmpMask = ~0; + CmpValue = MI.getOperand(1).getImm(); + } else { + CmpMask = CmpValue = 0; + } return true; // A SUB can be used to perform comparison. case X86::SUB64rm: @@ -6031,7 +6803,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case X86::SUB8rm: SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; - CmpMask = ~0; + CmpMask = 0; CmpValue = 0; return true; case X86::SUB64rr: @@ -6040,7 +6812,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case X86::SUB8rr: SrcReg = MI.getOperand(1).getReg(); SrcReg2 = MI.getOperand(2).getReg(); - CmpMask = ~0; + CmpMask = 0; CmpValue = 0; return true; case X86::SUB64ri32: @@ -6050,12 +6822,14 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB8ri: - if (!MI.getOperand(2).isImm()) - return false; SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; - CmpMask = ~0; - CmpValue = MI.getOperand(2).getImm(); + if (MI.getOperand(2).isImm()) { + CmpMask = ~0; + CmpValue = MI.getOperand(2).getImm(); + } else { + CmpMask = CmpValue = 0; + } return true; case X86::CMP64rr: case X86::CMP32rr: @@ -6063,7 +6837,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case X86::CMP8rr: SrcReg = MI.getOperand(0).getReg(); SrcReg2 = MI.getOperand(1).getReg(); - CmpMask = ~0; + CmpMask = 0; CmpValue = 0; return true; case X86::TEST8rr: @@ -6089,8 +6863,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, /// SrcReg, SrcRegs: register operands for FlagI. /// ImmValue: immediate for FlagI if it takes an immediate. inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg, - unsigned SrcReg2, int ImmValue, - MachineInstr &OI) { + unsigned SrcReg2, int ImmMask, + int ImmValue, MachineInstr &OI) { if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) || (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) || (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) || @@ -6101,7 +6875,8 @@ inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg, OI.getOperand(2).getReg() == SrcReg))) return true; - if (((FlagI.getOpcode() == X86::CMP64ri32 && + if (ImmMask != 0 && + ((FlagI.getOpcode() == X86::CMP64ri32 && OI.getOpcode() == X86::SUB64ri32) || (FlagI.getOpcode() == X86::CMP64ri8 && OI.getOpcode() == X86::SUB64ri8) || @@ -6288,7 +7063,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // If we are comparing against zero, check whether we can use MI to update // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. - bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0); + bool IsCmpZero = (CmpMask != 0 && CmpValue == 0); if (IsCmpZero && MI->getParent() != CmpInstr.getParent()) return false; @@ -6338,8 +7113,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, for (; RI != RE; ++RI) { MachineInstr &Instr = *RI; // Check whether CmpInstr can be made redundant by the current instruction. - if (!IsCmpZero && - isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) { + if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, + CmpValue, Instr)) { Sub = &Instr; break; } @@ -6764,14 +7539,33 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { assert(HasAVX && "AVX not supported"); return Expand2AddrUndef(MIB, get(X86::VXORPSYrr)); case X86::AVX512_128_SET0: - return Expand2AddrUndef(MIB, get(X86::VPXORDZ128rr)); - case X86::AVX512_256_SET0: - return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr)); + case X86::AVX512_FsFLD0SS: + case X86::AVX512_FsFLD0SD: { + bool HasVLX = Subtarget.hasVLX(); + unsigned SrcReg = MIB->getOperand(0).getReg(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) + return Expand2AddrUndef(MIB, + get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr)); + // Extended register without VLX. Use a larger XOR. + SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass); + MIB->getOperand(0).setReg(SrcReg); + return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); + } + case X86::AVX512_256_SET0: { + bool HasVLX = Subtarget.hasVLX(); + unsigned SrcReg = MIB->getOperand(0).getReg(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) + return Expand2AddrUndef(MIB, + get(HasVLX ? X86::VPXORDZ256rr : X86::VXORPSYrr)); + // Extended register without VLX. Use a larger XOR. + SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass); + MIB->getOperand(0).setReg(SrcReg); + return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); + } case X86::AVX512_512_SET0: return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); - case X86::AVX512_FsFLD0SS: - case X86::AVX512_FsFLD0SD: - return Expand2AddrUndef(MIB, get(X86::VXORPSZ128rr)); case X86::V_SETALLONES: return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); case X86::AVX2_SETALLONES: @@ -6838,11 +7632,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // registers, since it is not usable as a write mask. // FIXME: A more advanced approach would be to choose the best input mask // register based on context. - case X86::KSET0B: case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0); case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0); case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0); - case X86::KSET1B: case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0); case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0); case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0); @@ -6860,7 +7652,7 @@ static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs, if (NumAddrOps < 4) { // FrameIndex only - add an immediate offset (whether its zero or not). for (unsigned i = 0; i != NumAddrOps; ++i) - MIB.addOperand(MOs[i]); + MIB.add(MOs[i]); addOffset(MIB, PtrOffset); } else { // General Memory Addressing - we need to add any offset to an existing @@ -6871,7 +7663,7 @@ static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs, if (i == 3 && PtrOffset != 0) { MIB.addDisp(MO, PtrOffset); } else { - MIB.addOperand(MO); + MIB.add(MO); } } } @@ -6893,11 +7685,11 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, unsigned NumOps = MI.getDesc().getNumOperands() - 2; for (unsigned i = 0; i != NumOps; ++i) { MachineOperand &MO = MI.getOperand(i + 2); - MIB.addOperand(MO); + MIB.add(MO); } for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - MIB.addOperand(MO); + MIB.add(MO); } MachineBasicBlock *MBB = InsertPt->getParent(); @@ -6922,7 +7714,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, assert(MO.isReg() && "Expected to fold into reg operand!"); addOperands(MIB, MOs, PtrOffset); } else { - MIB.addOperand(MO); + MIB.add(MO); } } @@ -7226,7 +8018,7 @@ static bool hasPartialRegUpdate(unsigned Opcode) { return false; } -/// Inform the ExeDepsFix pass how many idle +/// Inform the ExecutionDepsFix pass how many idle /// instructions we would like before a partial register update. unsigned X86InstrInfo::getPartialRegUpdateClearance( const MachineInstr &MI, unsigned OpNum, @@ -7344,11 +8136,15 @@ static bool hasUndefRegUpdate(unsigned Opcode) { case X86::VCVTUSI642SDZrrb_Int: case X86::VCVTUSI642SDZrm_Int: case X86::VCVTSD2SSZrr: - case X86::VCVTSD2SSZrrb: + case X86::VCVTSD2SSZrr_Int: + case X86::VCVTSD2SSZrrb_Int: case X86::VCVTSD2SSZrm: + case X86::VCVTSD2SSZrm_Int: case X86::VCVTSS2SDZrr: - case X86::VCVTSS2SDZrrb: + case X86::VCVTSS2SDZrr_Int: + case X86::VCVTSS2SDZrrb_Int: case X86::VCVTSS2SDZrm: + case X86::VCVTSS2SDZrm_Int: case X86::VRNDSCALESDr: case X86::VRNDSCALESDrb: case X86::VRNDSCALESDm: @@ -7375,8 +8171,8 @@ static bool hasUndefRegUpdate(unsigned Opcode) { return false; } -/// Inform the ExeDepsFix pass how many idle instructions we would like before -/// certain undef register reads. +/// Inform the ExecutionDepsFix pass how many idle instructions we would like +/// before certain undef register reads. /// /// This catches the VCVTSI2SD family of instructions: /// @@ -7522,6 +8318,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int: case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int: case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int: + case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz: + case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz: + case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz: + case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz: + case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz: + case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz: case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int: case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int: case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int: @@ -7536,6 +8338,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int: case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int: case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int: + case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk: + case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk: + case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk: + case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk: + case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk: + case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk: + case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz: + case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz: + case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz: + case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz: + case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz: + case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz: return false; default: return true; @@ -7555,6 +8369,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int: case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int: case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int: + case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz: + case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz: + case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz: + case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz: + case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz: + case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz: case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int: case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int: case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int: @@ -7569,6 +8389,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int: case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int: case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int: + case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk: + case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk: + case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk: + case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk: + case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk: + case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk: + case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz: + case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz: + case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz: + case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz: + case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz: + case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz: return false; default: return true; @@ -7800,11 +8632,11 @@ bool X86InstrInfo::unfoldMemoryOperand( if (FoldedStore) MIB.addReg(Reg, RegState::Define); for (MachineOperand &BeforeOp : BeforeOps) - MIB.addOperand(BeforeOp); + MIB.add(BeforeOp); if (FoldedLoad) MIB.addReg(Reg); for (MachineOperand &AfterOp : AfterOps) - MIB.addOperand(AfterOp); + MIB.add(AfterOp); for (MachineOperand &ImpOp : ImpOps) { MIB.addReg(ImpOp.getReg(), getDefRegState(ImpOp.isDef()) | @@ -8143,28 +8975,29 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, break; } - // Check if chain operands and base addresses match. - if (Load1->getOperand(0) != Load2->getOperand(0) || - Load1->getOperand(5) != Load2->getOperand(5)) + // Lambda to check if both the loads have the same value for an operand index. + auto HasSameOp = [&](int I) { + return Load1->getOperand(I) == Load2->getOperand(I); + }; + + // All operands except the displacement should match. + if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) || + !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg)) return false; - // Segment operands should match as well. - if (Load1->getOperand(4) != Load2->getOperand(4)) + + // Chain Operand must be the same. + if (!HasSameOp(5)) return false; - // Scale should be 1, Index should be Reg0. - if (Load1->getOperand(1) == Load2->getOperand(1) && - Load1->getOperand(2) == Load2->getOperand(2)) { - if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1) - return false; - // Now let's examine the displacements. - if (isa<ConstantSDNode>(Load1->getOperand(3)) && - isa<ConstantSDNode>(Load2->getOperand(3))) { - Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue(); - Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue(); - return true; - } - } - return false; + // Now let's examine if the displacements are constants. + auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp)); + auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp)); + if (!Disp1 || !Disp2) + return false; + + Offset1 = Disp1->getSExtValue(); + Offset2 = Disp2->getSExtValue(); + return true; } bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, @@ -8215,165 +9048,6 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, return true; } -bool X86InstrInfo::shouldScheduleAdjacent(const MachineInstr &First, - const MachineInstr &Second) const { - // Check if this processor supports macro-fusion. Since this is a minor - // heuristic, we haven't specifically reserved a feature. hasAVX is a decent - // proxy for SandyBridge+. - if (!Subtarget.hasAVX()) - return false; - - enum { - FuseTest, - FuseCmp, - FuseInc - } FuseKind; - - switch (Second.getOpcode()) { - default: - return false; - case X86::JE_1: - case X86::JNE_1: - case X86::JL_1: - case X86::JLE_1: - case X86::JG_1: - case X86::JGE_1: - FuseKind = FuseInc; - break; - case X86::JB_1: - case X86::JBE_1: - case X86::JA_1: - case X86::JAE_1: - FuseKind = FuseCmp; - break; - case X86::JS_1: - case X86::JNS_1: - case X86::JP_1: - case X86::JNP_1: - case X86::JO_1: - case X86::JNO_1: - FuseKind = FuseTest; - break; - } - switch (First.getOpcode()) { - default: - return false; - case X86::TEST8rr: - case X86::TEST16rr: - case X86::TEST32rr: - case X86::TEST64rr: - case X86::TEST8ri: - case X86::TEST16ri: - case X86::TEST32ri: - case X86::TEST32i32: - case X86::TEST64i32: - case X86::TEST64ri32: - case X86::TEST8rm: - case X86::TEST16rm: - case X86::TEST32rm: - case X86::TEST64rm: - case X86::TEST8ri_NOREX: - case X86::AND16i16: - case X86::AND16ri: - case X86::AND16ri8: - case X86::AND16rm: - case X86::AND16rr: - case X86::AND32i32: - case X86::AND32ri: - case X86::AND32ri8: - case X86::AND32rm: - case X86::AND32rr: - case X86::AND64i32: - case X86::AND64ri32: - case X86::AND64ri8: - case X86::AND64rm: - case X86::AND64rr: - case X86::AND8i8: - case X86::AND8ri: - case X86::AND8rm: - case X86::AND8rr: - return true; - case X86::CMP16i16: - case X86::CMP16ri: - case X86::CMP16ri8: - case X86::CMP16rm: - case X86::CMP16rr: - case X86::CMP32i32: - case X86::CMP32ri: - case X86::CMP32ri8: - case X86::CMP32rm: - case X86::CMP32rr: - case X86::CMP64i32: - case X86::CMP64ri32: - case X86::CMP64ri8: - case X86::CMP64rm: - case X86::CMP64rr: - case X86::CMP8i8: - case X86::CMP8ri: - case X86::CMP8rm: - case X86::CMP8rr: - case X86::ADD16i16: - case X86::ADD16ri: - case X86::ADD16ri8: - case X86::ADD16ri8_DB: - case X86::ADD16ri_DB: - case X86::ADD16rm: - case X86::ADD16rr: - case X86::ADD16rr_DB: - case X86::ADD32i32: - case X86::ADD32ri: - case X86::ADD32ri8: - case X86::ADD32ri8_DB: - case X86::ADD32ri_DB: - case X86::ADD32rm: - case X86::ADD32rr: - case X86::ADD32rr_DB: - case X86::ADD64i32: - case X86::ADD64ri32: - case X86::ADD64ri32_DB: - case X86::ADD64ri8: - case X86::ADD64ri8_DB: - case X86::ADD64rm: - case X86::ADD64rr: - case X86::ADD64rr_DB: - case X86::ADD8i8: - case X86::ADD8mi: - case X86::ADD8mr: - case X86::ADD8ri: - case X86::ADD8rm: - case X86::ADD8rr: - case X86::SUB16i16: - case X86::SUB16ri: - case X86::SUB16ri8: - case X86::SUB16rm: - case X86::SUB16rr: - case X86::SUB32i32: - case X86::SUB32ri: - case X86::SUB32ri8: - case X86::SUB32rm: - case X86::SUB32rr: - case X86::SUB64i32: - case X86::SUB64ri32: - case X86::SUB64ri8: - case X86::SUB64rm: - case X86::SUB64rr: - case X86::SUB8i8: - case X86::SUB8ri: - case X86::SUB8rm: - case X86::SUB8rr: - return FuseKind == FuseCmp || FuseKind == FuseInc; - case X86::INC16r: - case X86::INC32r: - case X86::INC64r: - case X86::INC8r: - case X86::DEC16r: - case X86::DEC32r: - case X86::DEC64r: - case X86::DEC8r: - return FuseKind == FuseInc; - } -} - bool X86InstrInfo:: reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 1 && "Invalid X86 branch condition!"); @@ -8424,6 +9098,7 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr }, + { X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr }, { X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr }, { X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm }, { X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm }, @@ -8443,6 +9118,7 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr }, + { X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr }, { X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr }, { X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm }, { X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm }, @@ -8465,7 +9141,7 @@ static const uint16_t ReplaceableInstrs[][3] = { // AVX512 support { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr }, { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr }, - { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr }, + { X86::VMOVNTPSZ256mr, X86::VMOVNTPDZ256mr, X86::VMOVNTDQZ256mr }, { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr }, { X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr }, { X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr }, @@ -8493,10 +9169,6 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, - { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, - { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, - { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, - { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }, { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm}, @@ -8508,6 +9180,14 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 }, }; +static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = { + //PackedSingle PackedDouble PackedInt + { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, + { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, + { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, +}; + static const uint16_t ReplaceableInstrsAVX512[][4] = { // Two integer columns for 64-bit and 32-bit elements. //PackedSingle PackedDouble PackedInt PackedInt @@ -8769,16 +9449,25 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const { validDomains = 0xe; } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) { validDomains = Subtarget.hasAVX2() ? 0xe : 0x6; + } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) { + // Insert/extract instructions should only effect domain if AVX2 + // is enabled. + if (!Subtarget.hasAVX2()) + return std::make_pair(0, 0); + validDomains = 0xe; } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) { validDomains = 0xe; - } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) { - validDomains = Subtarget.hasDQI() ? 0xe : 0x8; - } else if (const uint16_t *table = lookupAVX512(opcode, domain, + } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain, + ReplaceableInstrsAVX512DQ)) { + validDomains = 0xe; + } else if (Subtarget.hasDQI()) { + if (const uint16_t *table = lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQMasked)) { - if (domain == 1 || (domain == 3 && table[3] == opcode)) - validDomains = Subtarget.hasDQI() ? 0xa : 0x8; - else - validDomains = Subtarget.hasDQI() ? 0xc : 0x8; + if (domain == 1 || (domain == 3 && table[3] == opcode)) + validDomains = 0xa; + else + validDomains = 0xc; + } } } return std::make_pair(domain, validDomains); @@ -8794,6 +9483,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { "256-bit vector operations only available in AVX2"); table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2); } + if (!table) { // try the other table + assert(Subtarget.hasAVX2() && + "256-bit insert/extract only available in AVX2"); + table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract); + } if (!table) { // try the AVX512 table assert(Subtarget.hasAVX512() && "Requires AVX-512"); table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512); @@ -9457,28 +10151,6 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { return makeArrayRef(TargetFlags); } -bool X86InstrInfo::isTailCall(const MachineInstr &Inst) const { - switch (Inst.getOpcode()) { - case X86::TCRETURNdi: - case X86::TCRETURNmi: - case X86::TCRETURNri: - case X86::TCRETURNdi64: - case X86::TCRETURNmi64: - case X86::TCRETURNri64: - case X86::TAILJMPd: - case X86::TAILJMPm: - case X86::TAILJMPr: - case X86::TAILJMPd64: - case X86::TAILJMPm64: - case X86::TAILJMPr64: - case X86::TAILJMPm64_REX: - case X86::TAILJMPr64_REX: - return true; - default: - return false; - } -} - namespace { /// Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. @@ -9665,3 +10337,124 @@ namespace { char LDTLSCleanup::ID = 0; FunctionPass* llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } + +unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize, + size_t Occurrences, + bool CanBeTailCall) const { + unsigned NotOutlinedSize = SequenceSize * Occurrences; + unsigned OutlinedSize; + + // Is it a tail call? + if (CanBeTailCall) { + // If yes, we don't have to include a return instruction-- it's already in + // our sequence. So we have one occurrence of the sequence + #Occurrences + // calls. + OutlinedSize = SequenceSize + Occurrences; + } else { + // If not, add one for the return instruction. + OutlinedSize = (SequenceSize + 1) + Occurrences; + } + + // Return the number of instructions saved by outlining this sequence. + return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; +} + +bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { + return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); +} + +X86GenInstrInfo::MachineOutlinerInstrType +X86InstrInfo::getOutliningType(MachineInstr &MI) const { + + // Don't allow debug values to impact outlining type. + if (MI.isDebugValue() || MI.isIndirectDebugValue()) + return MachineOutlinerInstrType::Invisible; + + // Is this a tail call? If yes, we can outline as a tail call. + if (isTailCall(MI)) + return MachineOutlinerInstrType::Legal; + + // Is this the terminator of a basic block? + if (MI.isTerminator() || MI.isReturn()) { + + // Does its parent have any successors in its MachineFunction? + if (MI.getParent()->succ_empty()) + return MachineOutlinerInstrType::Legal; + + // It does, so we can't tail call it. + return MachineOutlinerInstrType::Illegal; + } + + // Don't outline anything that modifies or reads from the stack pointer. + // + // FIXME: There are instructions which are being manually built without + // explicit uses/defs so we also have to check the MCInstrDesc. We should be + // able to remove the extra checks once those are fixed up. For example, + // sometimes we might get something like %RAX<def> = POP64r 1. This won't be + // caught by modifiesRegister or readsRegister even though the instruction + // really ought to be formed so that modifiesRegister/readsRegister would + // catch it. + if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) || + MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) || + MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) + return MachineOutlinerInstrType::Illegal; + + // Outlined calls change the instruction pointer, so don't read from it. + if (MI.readsRegister(X86::RIP, &RI) || + MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) || + MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP)) + return MachineOutlinerInstrType::Illegal; + + // Positions can't safely be outlined. + if (MI.isPosition()) + return MachineOutlinerInstrType::Illegal; + + // Make sure none of the operands of this instruction do anything tricky. + for (const MachineOperand &MOP : MI.operands()) + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return MachineOutlinerInstrType::Illegal; + + return MachineOutlinerInstrType::Legal; +} + +void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const { + + // If we're a tail call, we already have a return, so don't do anything. + if (IsTailCall) + return; + + // We're a normal call, so our sequence doesn't have a return instruction. + // Add it in. + MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ)); + MBB.insert(MBB.end(), retq); +} + +void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const { + return; +} + +MachineBasicBlock::iterator +X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, + MachineFunction &MF, + bool IsTailCall) const { + // Is it a tail call? + if (IsTailCall) { + // Yes, just insert a JMP. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(X86::JMP_1)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + } else { + // No, insert a call. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + } + + return It; +} |