aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp165
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp211
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp37
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp194
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h109
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h60
-rw-r--r--contrib/llvm/lib/Target/PowerPC/P9InstrResources.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td18
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp42
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp139
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp491
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp1041
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h39
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td174
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td21
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp559
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h82
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td380
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td50
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td316
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td19
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp95
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h19
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h4
49 files changed, 2941 insertions, 1578 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 56307a84f2e5..8b3480f772e9 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -21,7 +21,6 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
@@ -31,169 +30,7 @@
using namespace llvm;
-static const MCPhysReg RRegs[32] = {
- PPC::R0, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-static const MCPhysReg RRegsNoR0[32] = {
- PPC::ZERO,
- PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-static const MCPhysReg XRegs[32] = {
- PPC::X0, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-static const MCPhysReg XRegsNoX0[32] = {
- PPC::ZERO8,
- PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-static const MCPhysReg FRegs[32] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31
-};
-static const MCPhysReg SPERegs[32] = {
- PPC::S0, PPC::S1, PPC::S2, PPC::S3,
- PPC::S4, PPC::S5, PPC::S6, PPC::S7,
- PPC::S8, PPC::S9, PPC::S10, PPC::S11,
- PPC::S12, PPC::S13, PPC::S14, PPC::S15,
- PPC::S16, PPC::S17, PPC::S18, PPC::S19,
- PPC::S20, PPC::S21, PPC::S22, PPC::S23,
- PPC::S24, PPC::S25, PPC::S26, PPC::S27,
- PPC::S28, PPC::S29, PPC::S30, PPC::S31
-};
-static const MCPhysReg VFRegs[32] = {
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static const MCPhysReg VRegs[32] = {
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-static const MCPhysReg VSRegs[64] = {
- PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
- PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
- PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
- PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
- PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
- PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
- PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
- PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
-
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-static const MCPhysReg VSFRegs[64] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static const MCPhysReg VSSRegs[64] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static unsigned QFRegs[32] = {
- PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
- PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
- PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
- PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
- PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
- PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
- PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
-};
-static const MCPhysReg CRBITRegs[32] = {
- PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
- PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
- PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
- PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
- PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
-};
-static const MCPhysReg CRRegs[8] = {
- PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
- PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
-};
+DEFINE_PPC_REGCLASSES;
// Evaluate an expression containing condition register
// or condition register field symbols. Returns positive
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index db01271b87e1..26869f250823 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -17,6 +17,8 @@
using namespace llvm;
+DEFINE_PPC_REGCLASSES;
+
#define DEBUG_TYPE "ppc-disassembler"
typedef MCDisassembler::DecodeStatus DecodeStatus;
@@ -62,184 +64,9 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
// FIXME: These can be generated by TableGen from the existing register
// encoding values!
-static const unsigned CRRegs[] = {
- PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
- PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
-};
-
-static const unsigned CRBITRegs[] = {
- PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
- PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
- PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
- PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
- PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
-};
-
-static const unsigned FRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31
-};
-
-static const unsigned VFRegs[] = {
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned VRegs[] = {
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-static const unsigned VSRegs[] = {
- PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
- PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
- PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
- PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
- PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
- PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
- PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
- PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
-
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-static const unsigned VSFRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned VSSRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned GPRegs[] = {
- PPC::R0, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-
-static const unsigned GP0Regs[] = {
- PPC::ZERO, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-
-static const unsigned G8Regs[] = {
- PPC::X0, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-
-static const unsigned G80Regs[] = {
- PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-
-static const unsigned QFRegs[] = {
- PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
- PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
- PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
- PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
- PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
- PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
- PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
-};
-
-static const unsigned SPERegs[] = {
- PPC::S0, PPC::S1, PPC::S2, PPC::S3,
- PPC::S4, PPC::S5, PPC::S6, PPC::S7,
- PPC::S8, PPC::S9, PPC::S10, PPC::S11,
- PPC::S12, PPC::S13, PPC::S14, PPC::S15,
- PPC::S16, PPC::S17, PPC::S18, PPC::S19,
- PPC::S20, PPC::S21, PPC::S22, PPC::S23,
- PPC::S24, PPC::S25, PPC::S26, PPC::S27,
- PPC::S28, PPC::S29, PPC::S30, PPC::S31
-};
-
template <std::size_t N>
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
- const unsigned (&Regs)[N]) {
+ const MCPhysReg (&Regs)[N]) {
assert(RegNo < N && "Invalid register number");
Inst.addOperand(MCOperand::createReg(Regs[RegNo]));
return MCDisassembler::Success;
@@ -308,25 +135,25 @@ static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GPRegs);
+ return decodeRegisterClass(Inst, RegNo, RRegs);
}
static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GP0Regs);
+ return decodeRegisterClass(Inst, RegNo, RRegsNoR0);
}
static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, G8Regs);
+ return decodeRegisterClass(Inst, RegNo, XRegs);
}
static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, G80Regs);
+ return decodeRegisterClass(Inst, RegNo, XRegsNoX0);
}
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
@@ -341,7 +168,7 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GPRegs);
+ return decodeRegisterClass(Inst, RegNo, RRegs);
}
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
@@ -388,19 +215,19 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
case PPC::LFSU:
case PPC::LFDU:
// Add the tied output operand.
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
break;
case PPC::STBU:
case PPC::STHU:
case PPC::STWU:
case PPC::STFSU:
case PPC::STFDU:
- Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+ Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
break;
}
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -416,12 +243,12 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
if (Inst.getOpcode() == PPC::LDU)
// Add the tied output operand.
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
else if (Inst.getOpcode() == PPC::STDU)
- Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+ Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -436,7 +263,7 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 4)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -451,7 +278,7 @@ static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 3));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -466,7 +293,7 @@ static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 2));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -481,7 +308,7 @@ static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 1));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index fd7f81591426..fc29e4effbb1 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -499,43 +499,14 @@ bool PPCInstPrinter::showRegistersWithPrefix() const {
return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames;
}
-/// stripRegisterPrefix - This method strips the character prefix from a
-/// register name so that only the number is left.
-static const char *stripRegisterPrefix(const char *RegName) {
- switch (RegName[0]) {
- case 'r':
- case 'f':
- case 'q': // for QPX
- case 'v':
- if (RegName[1] == 's')
- return RegName + 2;
- return RegName + 1;
- case 'c': if (RegName[1] == 'r') return RegName + 2;
- }
-
- return RegName;
-}
-
void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
-
- // There are VSX instructions that use VSX register numbering (vs0 - vs63)
- // as well as those that use VMX register numbering (v0 - v31 which
- // correspond to vs32 - vs63). If we have an instruction that uses VSX
- // numbering, we need to convert the VMX registers to VSX registers.
- // Namely, we print 32-63 when the instruction operates on one of the
- // VMX registers.
- // (Please synchronize with PPCAsmPrinter::printOperand)
- if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) &&
- !ShowVSRNumsAsVR) {
- if (PPCInstrInfo::isVRRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (PPCInstrInfo::isVFRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::VF0);
- }
+ if (!ShowVSRNumsAsVR)
+ Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()),
+ Reg, OpNo);
const char *RegName;
RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg));
@@ -544,7 +515,7 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (showRegistersWithPercentPrefix(RegName))
O << "%";
if (!showRegistersWithPrefix())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName;
return;
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 57bda1403c62..8c15ade6f9c4 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -13,18 +13,13 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "PPCInstrInfo.h"
+#include "PPCMCCodeEmitter.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
@@ -39,117 +34,6 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
-namespace {
-
-class PPCMCCodeEmitter : public MCCodeEmitter {
- const MCInstrInfo &MCII;
- const MCContext &CTX;
- bool IsLittleEndian;
-
-public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), CTX(ctx),
- IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
- PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
- void operator=(const PPCMCCodeEmitter &) = delete;
- ~PPCMCCodeEmitter() override = default;
-
- unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- // getBinaryCodeForInstr - TableGen'erated function for getting the
- // binary encoding for an instruction.
- uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
- unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MCII.get(Opcode);
-
- uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
-
- // Output the constant in big/little endian byte order.
- unsigned Size = Desc.getSize();
- support::endianness E = IsLittleEndian ? support::little : support::big;
- switch (Size) {
- case 0:
- break;
- case 4:
- support::endian::write<uint32_t>(OS, Bits, E);
- break;
- case 8:
- // If we emit a pair of instructions, the first one is
- // always in the top 32 bits, even on little-endian.
- support::endian::write<uint32_t>(OS, Bits >> 32, E);
- support::endian::write<uint32_t>(OS, Bits, E);
- break;
- default:
- llvm_unreachable("Invalid instruction size");
- }
-
- ++MCNumEmitted; // Keep track of the # of mi's emitted.
- }
-
-private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
-};
-
-} // end anonymous namespace
-
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx) {
@@ -264,10 +148,16 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12;
const MCOperand &MO = MI.getOperand(OpNo);
- assert(MO.isImm() && !(MO.getImm() % 16) &&
- "Expecting an immediate that is a multiple of 16");
+ if (MO.isImm()) {
+ assert(!(MO.getImm() % 16) &&
+ "Expecting an immediate that is a multiple of 16");
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+ }
- return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+ // Otherwise add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16ds));
+ return RegBits;
}
unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
@@ -354,6 +244,20 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
}
+// Get the index for this operand in this instruction. This is needed for
+// computing the register number in PPCInstrInfo::getRegNumForOperand() for
+// any instructions that use a different numbering scheme for registers in
+// different operands.
+static unsigned getOpIdxForMO(const MCInst &MI, const MCOperand &MO) {
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ const MCOperand &Op = MI.getOperand(i);
+ if (&Op == &MO)
+ return i;
+ }
+ llvm_unreachable("This operand is not part of this instruction");
+ return ~0U; // Silence any warnings about no return.
+}
+
unsigned PPCMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
@@ -364,14 +268,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- unsigned Reg = MO.getReg();
- unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg);
-
- if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg))
- if (PPCInstrInfo::isVRRegister(Reg))
- Encode += 32;
-
- return Encode;
+ unsigned OpNo = getOpIdxForMO(MI, MO);
+ unsigned Reg =
+ PPCInstrInfo::getRegNumForOperand(MCII.get(MI.getOpcode()),
+ MO.getReg(), OpNo);
+ return CTX.getRegisterInfo()->getEncodingValue(Reg);
}
assert(MO.isImm() &&
@@ -379,5 +280,42 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return MO.getImm();
}
+void PPCMCCodeEmitter::encodeInstruction(
+ const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ verifyInstructionPredicates(MI,
+ computeAvailableFeatures(STI.getFeatureBits()));
+
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+
+ // Output the constant in big/little endian byte order.
+ unsigned Size = getInstSizeInBytes(MI);
+ support::endianness E = IsLittleEndian ? support::little : support::big;
+ switch (Size) {
+ case 0:
+ break;
+ case 4:
+ support::endian::write<uint32_t>(OS, Bits, E);
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ support::endian::write<uint32_t>(OS, Bits >> 32, E);
+ support::endian::write<uint32_t>(OS, Bits, E);
+ break;
+ default:
+ llvm_unreachable("Invalid instruction size");
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+// Get the number of bytes used to encode the given MCInst.
+unsigned PPCMCCodeEmitter::getInstSizeInBytes(const MCInst &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ return Desc.getSize();
+}
+
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
new file mode 100644
index 000000000000..a4bcff4b9450
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -0,0 +1,109 @@
+//===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
+#define LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ const MCInstrInfo &MCII;
+ const MCContext &CTX;
+ bool IsLittleEndian;
+
+public:
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), CTX(ctx),
+ IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+ void operator=(const PPCMCCodeEmitter &) = delete;
+ ~PPCMCCodeEmitter() override = default;
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ // Get the number of bytes used to encode the given MCInst.
+ unsigned getInstSizeInBytes(const MCInst &MI) const;
+
+private:
+ uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+ void verifyInstructionPredicates(const MCInst &MI,
+ uint64_t AvailableFeatures) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 316fd2ccf358..d6e450cba0d7 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -17,6 +17,7 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/MathExtras.h"
#include <cstdint>
#include <memory>
@@ -104,4 +105,63 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
#define GET_SUBTARGETINFO_ENUM
#include "PPCGenSubtargetInfo.inc"
+#define PPC_REGS0_31(X) \
+ { \
+ X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
+ X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \
+ X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \
+ }
+
+#define PPC_REGS_NO0_31(Z, X) \
+ { \
+ Z, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
+ X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \
+ X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \
+ }
+
+#define PPC_REGS_LO_HI(LO, HI) \
+ { \
+ LO##0, LO##1, LO##2, LO##3, LO##4, LO##5, LO##6, LO##7, LO##8, LO##9, \
+ LO##10, LO##11, LO##12, LO##13, LO##14, LO##15, LO##16, LO##17, \
+ LO##18, LO##19, LO##20, LO##21, LO##22, LO##23, LO##24, LO##25, \
+ LO##26, LO##27, LO##28, LO##29, LO##30, LO##31, HI##0, HI##1, HI##2, \
+ HI##3, HI##4, HI##5, HI##6, HI##7, HI##8, HI##9, HI##10, HI##11, \
+ HI##12, HI##13, HI##14, HI##15, HI##16, HI##17, HI##18, HI##19, \
+ HI##20, HI##21, HI##22, HI##23, HI##24, HI##25, HI##26, HI##27, \
+ HI##28, HI##29, HI##30, HI##31 \
+ }
+
+using llvm::MCPhysReg;
+
+#define DEFINE_PPC_REGCLASSES \
+ static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \
+ static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \
+ static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \
+ static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
+ static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
+ static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
+ static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
+ static const MCPhysReg RRegsNoR0[32] = \
+ PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
+ static const MCPhysReg XRegsNoX0[32] = \
+ PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \
+ static const MCPhysReg VSRegs[64] = \
+ PPC_REGS_LO_HI(PPC::VSL, PPC::V); \
+ static const MCPhysReg VSFRegs[64] = \
+ PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+ static const MCPhysReg VSSRegs[64] = \
+ PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+ static const MCPhysReg CRBITRegs[32] = { \
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, \
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, \
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, \
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, \
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \
+ static const MCPhysReg CRRegs[8] = { \
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7}
+
#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
index c6cbb9037ede..17c37964c562 100644
--- a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -111,11 +111,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
(instregex "POPCNT(D|W)$"),
(instregex "CMPB(8)?$"),
+ (instregex "SETB(8)?$"),
XSTDIVDP,
XSTSQRTDP,
XSXSIGDP,
XSCVSPDPN,
- SETB,
BPERMD
)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 80ad4962a20f..98e6e98e6974 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -305,11 +305,11 @@ def : Processor<"generic", G3Itineraries, [Directive32, FeatureHardFloat,
FeatureMFTB]>;
def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureICBT, FeatureBookE,
+ FeatureICBT, FeatureBookE,
FeatureMSYNC, FeatureMFTB]>;
def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureICBT, FeatureBookE,
+ FeatureICBT, FeatureBookE,
FeatureMSYNC, FeatureMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601, FeatureFPU]>;
def : Processor<"602", G3Itineraries, [Directive602, FeatureFPU,
@@ -348,7 +348,7 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE,
FeatureMFTB]>;
def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
- FeatureFRES, FeatureFRSQRTE,
+ FeatureFRES, FeatureFRSQRTE,
FeatureMFTB]>;
def : ProcessorModel<"970", G5Model,
@@ -369,11 +369,11 @@ def : ProcessorModel<"e500", PPCE500Model,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc,
- FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
- FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"a2", PPCA2Model,
[DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
@@ -428,7 +428,7 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
+def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
FeatureMFTB]>;
def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
@@ -478,3 +478,9 @@ def PPC : Target {
let AssemblyParserVariants = [PPCAsmParserVariant];
let AllowRegisterRenaming = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "PPCPfmCounters.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a9da64cc216f..04aa3c9b1e22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -158,23 +158,6 @@ public:
} // end anonymous namespace
-/// stripRegisterPrefix - This method strips the character prefix from a
-/// register name so that only the number is left. Used by for linux asm.
-static const char *stripRegisterPrefix(const char *RegName) {
- switch (RegName[0]) {
- case 'r':
- case 'f':
- case 'q': // for QPX
- case 'v':
- if (RegName[1] == 's')
- return RegName + 2;
- return RegName + 1;
- case 'c': if (RegName[1] == 'r') return RegName + 2;
- }
-
- return RegName;
-}
-
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
const DataLayout &DL = getDataLayout();
@@ -182,27 +165,15 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
switch (MO.getType()) {
case MachineOperand::MO_Register: {
- unsigned Reg = MO.getReg();
-
- // There are VSX instructions that use VSX register numbering (vs0 - vs63)
- // as well as those that use VMX register numbering (v0 - v31 which
- // correspond to vs32 - vs63). If we have an instruction that uses VSX
- // numbering, we need to convert the VMX registers to VSX registers.
- // Namely, we print 32-63 when the instruction operates on one of the
- // VMX registers.
- // (Please synchronize with PPCInstPrinter::printOperand)
- if (MI->getDesc().TSFlags & PPCII::UseVSXReg) {
- if (PPCInstrInfo::isVRRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (PPCInstrInfo::isVFRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::VF0);
- }
+ unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(),
+ MO.getReg(), OpNo);
+
const char *RegName = PPCInstPrinter::getRegisterName(Reg);
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
if (!Subtarget->isDarwin())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName;
return;
}
@@ -279,6 +250,21 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (MI->getOperand(OpNo).isImm())
O << "i";
return false;
+ case 'x':
+ if(!MI->getOperand(OpNo).isReg())
+ return true;
+ // This operand uses VSX numbering.
+ // If the operand is a VMX register, convert it to a VSX register.
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ if (PPCInstrInfo::isVRRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::V0);
+ else if (PPCInstrInfo::isVFRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::VF0);
+ const char *RegName;
+ RegName = PPCInstPrinter::getRegisterName(Reg);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
+ O << RegName;
+ return false;
}
}
@@ -303,7 +289,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
{
const char *RegName = "r0";
if (!Subtarget->isDarwin())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName << ", ";
printOperand(MI, OpNo, O);
return false;
@@ -341,7 +327,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
}
void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
}
void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 12c581023234..22842d516e7d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -338,7 +338,7 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
// coldcc calling convection marks most registers as non-volatile.
// Do not include r1 since the stack pointer is never considered a CSR.
// Do not include r2, since it is the TOC register and is added depending
-// on wether or not the function uses the TOC and is a non-leaf.
+// on whether or not the function uses the TOC and is a non-leaf.
// Do not include r0,r11,r13 as they are optional in functional linkage
// and value may be altered by inter-library calls.
// Do not include r12 as it is used as a scratch register.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
index fe41e1b36a5d..a03e691ef5bb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -392,7 +392,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
// liveness state at the end of MBB (liveOut of MBB) as the liveIn for
// NewSuccessor. Otherwise, will cause cyclic dependence.
LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
- SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 2> Clobbers;
for (MachineInstr &MI : *MBB)
LPR.stepForward(MI, Clobbers);
for (auto &LI : LPR)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index f212894035db..3b2d92db78b9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -861,8 +861,20 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
}
+ unsigned SrcReg1 = getRegForValue(SrcValue1);
+ if (SrcReg1 == 0)
+ return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(SrcValue2);
+ if (SrcReg2 == 0)
+ return false;
+ }
+
unsigned CmpOpc;
bool NeedsExt = false;
+ auto RC = MRI.getRegClass(SrcReg1);
switch (SrcVT.SimpleTy) {
default: return false;
case MVT::f32:
@@ -879,8 +891,15 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
CmpOpc = PPC::EFSCMPGT;
break;
}
- } else
+ } else {
CmpOpc = PPC::FCMPUS;
+ if (isVSSRCRegClass(RC)) {
+ unsigned TmpReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1);
+ SrcReg1 = TmpReg;
+ }
+ }
break;
case MVT::f64:
if (HasSPE) {
@@ -896,14 +915,17 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
CmpOpc = PPC::EFDCMPGT;
break;
}
- } else
+ } else if (isVSFRCRegClass(RC)) {
+ CmpOpc = PPC::XSCMPUDP;
+ } else {
CmpOpc = PPC::FCMPUD;
+ }
break;
case MVT::i1:
case MVT::i8:
case MVT::i16:
NeedsExt = true;
- // Intentional fall-through.
+ LLVM_FALLTHROUGH;
case MVT::i32:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
@@ -918,17 +940,6 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
break;
}
- unsigned SrcReg1 = getRegForValue(SrcValue1);
- if (SrcReg1 == 0)
- return false;
-
- unsigned SrcReg2 = 0;
- if (!UseImm) {
- SrcReg2 = getRegForValue(SrcValue2);
- if (SrcReg2 == 0)
- return false;
- }
-
if (NeedsExt) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
@@ -2354,7 +2365,8 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
return true;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 84dacf396462..8263954994d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -17,6 +17,7 @@
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,6 +29,16 @@
using namespace llvm;
+#define DEBUG_TYPE "framelowering"
+STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
+STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
+STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+
+static cl::opt<bool>
+EnablePEVectorSpills("ppc-enable-pe-vector-spills",
+ cl::desc("Enable spills in prologue to vector registers."),
+ cl::init(false), cl::Hidden);
+
/// VRRegNo - Map from a numbered VR register to its enum value.
///
static const MCPhysReg VRRegNo[] = {
@@ -466,6 +477,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Check whether we can skip adjusting the stack pointer (by using red zone)
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
+ NumNoNeedForFrame++;
// No need for frame
if (UpdateMF)
MFI.setStackSize(0);
@@ -1213,11 +1225,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
continue;
}
- int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (CSI[I].isSpilledToReg()) {
+ unsigned SpilledReg = CSI[I].getDstReg();
+ unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
+ nullptr, MRI->getDwarfRegNum(Reg, true),
+ MRI->getDwarfRegNum(SpilledReg, true)));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIRegister);
+ } else {
+ int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
}
}
}
@@ -1822,17 +1843,19 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// Move general register save area spill slots down, taking into account
// the size of the Floating-point register save area.
for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
- int FI = GPRegs[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ if (!GPRegs[i].isSpilledToReg()) {
+ int FI = GPRegs[i].getFrameIdx();
+ MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ }
}
// Move general register save area spill slots down, taking into account
// the size of the Floating-point register save area.
for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
- int FI = G8Regs[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ if (!G8Regs[i].isSpilledToReg()) {
+ int FI = G8Regs[i].getFrameIdx();
+ MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ }
}
unsigned MinReg =
@@ -1947,6 +1970,64 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
}
}
+// This function checks if a callee saved gpr can be spilled to a volatile
+// vector register. This occurs for leaf functions when the option
+// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
+// which were not spilled to vectors, return false so the target independent
+// code can handle them by assigning a FrameIdx to a stack slot.
+bool PPCFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+
+ if (CSI.empty())
+ return true; // Early exit if no callee saved registers are modified!
+
+ // Early exit if cannot spill gprs to volatile vector registers.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
+ return false;
+
+ // Build a BitVector of VSRs that can be used for spilling GPRs.
+ BitVector BVAllocatable = TRI->getAllocatableSet(MF);
+ BitVector BVCalleeSaved(TRI->getNumRegs());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ BVCalleeSaved.set(CSRegs[i]);
+
+ for (unsigned Reg : BVAllocatable.set_bits()) {
+ // Set to 0 if the register is not a volatile VF/F8 register, or if it is
+ // used in the function.
+ if (BVCalleeSaved[Reg] ||
+ (!PPC::F8RCRegClass.contains(Reg) &&
+ !PPC::VFRCRegClass.contains(Reg)) ||
+ (MF.getRegInfo().isPhysRegUsed(Reg)))
+ BVAllocatable.reset(Reg);
+ }
+
+ bool AllSpilledToReg = true;
+ for (auto &CS : CSI) {
+ if (BVAllocatable.none())
+ return false;
+
+ unsigned Reg = CS.getReg();
+ if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
+ AllSpilledToReg = false;
+ continue;
+ }
+
+ unsigned VolatileVFReg = BVAllocatable.find_first();
+ if (VolatileVFReg < BVAllocatable.size()) {
+ CS.setDstReg(VolatileVFReg);
+ BVAllocatable.reset(VolatileVFReg);
+ } else {
+ AllSpilledToReg = false;
+ }
+ }
+ return AllSpilledToReg;
+}
+
+
bool
PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -2012,12 +2093,18 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CSI[i].getFrameIdx()));
}
} else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- // Use !IsLiveIn for the kill flag.
- // We do not want to kill registers that are live in this function
- // before their use because they will become undefined registers.
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ if (CSI[i].isSpilledToReg()) {
+ NumPESpillVSR++;
+ BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
+ .addReg(Reg, getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ // Use !IsLiveIn for the kill flag.
+ // We do not want to kill registers that are live in this function
+ // before their use because they will become undefined registers.
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
}
}
return true;
@@ -2157,13 +2244,19 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
CR2Spilled = CR3Spilled = CR4Spilled = false;
}
- // Default behavior for non-CR saves.
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
- assert(I != MBB.begin() &&
- "loadRegFromStackSlot didn't insert any code!");
+ if (CSI[i].isSpilledToReg()) {
+ DebugLoc DL;
+ NumPEReloadVSR++;
+ BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
+ .addReg(CSI[i].getDstReg(), getKillRegState(true));
+ } else {
+ // Default behavior for non-CR saves.
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ assert(I != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
}
+ }
// Insert in reverse order.
if (AtStart)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 01c155594c44..69bd1484d6e5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -99,6 +99,13 @@ public:
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override;
+ /// This function will assign callee saved gprs to volatile vector registers
+ /// for prologue spills when applicable. It returns false if there are any
+ /// registers which were not spilled to volatile vector registers.
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 793a4dd7f624..5f6966cecd61 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -103,7 +103,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
case PPC::Sched::IIC_LdStLHA:
case PPC::Sched::IIC_LdStLHAU:
case PPC::Sched::IIC_LdStLWA:
- case PPC::Sched::IIC_LdStSTDU:
+ case PPC::Sched::IIC_LdStSTU:
case PPC::Sched::IIC_LdStSTFDU:
NSlots = 2;
break;
@@ -112,7 +112,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
case PPC::Sched::IIC_LdStLHAUX:
case PPC::Sched::IIC_LdStLWARX:
case PPC::Sched::IIC_LdStLDARX:
- case PPC::Sched::IIC_LdStSTDUX:
+ case PPC::Sched::IIC_LdStSTUX:
case PPC::Sched::IIC_LdStSTDCX:
case PPC::Sched::IIC_LdStSTWCX:
case PPC::Sched::IIC_BrMCRX: // mtcr
@@ -180,9 +180,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
CurGroup.clear();
CurSlots = CurBranches = 0;
} else {
- LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << SU->NodeNum
- << "): ");
- LLVM_DEBUG(DAG->dumpNode(SU));
+ LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: ");
+ LLVM_DEBUG(DAG->dumpNode(*SU));
unsigned NSlots;
bool MustBeFirst = mustComeFirst(MCID, NSlots);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6cec664d1e66..31acd0ff870f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison,
"Number of logical ops on i1 values calculated in GPR.");
STATISTIC(OmittedForNonExtendUses,
"Number of compares not eliminated as they have non-extending uses.");
+STATISTIC(NumP9Setb,
+ "Number of compares lowered to setb.");
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
@@ -327,7 +329,6 @@ private:
bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode *N, SDNode *Result);
- MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);
};
} // end anonymous namespace
@@ -490,7 +491,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
- const TerminatorInst *BBTerm = BB->getTerminator();
+ const Instruction *BBTerm = BB->getTerminator();
if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
@@ -687,9 +688,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- KnownBits LKnown, RKnown;
- CurDAG->computeKnownBits(Op0, LKnown);
- CurDAG->computeKnownBits(Op1, RKnown);
+ KnownBits LKnown = CurDAG->computeKnownBits(Op0);
+ KnownBits RKnown = CurDAG->computeKnownBits(Op1);
unsigned TargetMask = LKnown.Zero.getZExtValue();
unsigned InsertMask = RKnown.Zero.getZExtValue();
@@ -733,8 +733,7 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
// The AND mask might not be a constant, and we need to make sure that
// if we're going to fold the masking with the insert, all bits not
// know to be zero in the mask are known to be one.
- KnownBits MKnown;
- CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
+ KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
unsigned SHOpc = Op1.getOperand(0).getOpcode();
@@ -1083,9 +1082,14 @@ class BitPermutationSelector {
// lowest-order bit.
unsigned Idx;
+ // ConstZero means a bit we need to mask off.
+ // Variable is a bit comes from an input variable.
+ // VariableKnownToBeZero is also a bit comes from an input variable,
+ // but it is known to be already zero. So we do not need to mask them.
enum Kind {
ConstZero,
- Variable
+ Variable,
+ VariableKnownToBeZero
} K;
ValueBit(SDValue V, unsigned I, Kind K = Variable)
@@ -1094,11 +1098,11 @@ class BitPermutationSelector {
: V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
bool isZero() const {
- return K == ConstZero;
+ return K == ConstZero || K == VariableKnownToBeZero;
}
bool hasValue() const {
- return K == Variable;
+ return K == Variable || K == VariableKnownToBeZero;
}
SDValue getValue() const {
@@ -1248,8 +1252,14 @@ class BitPermutationSelector {
for (unsigned i = 0; i < NumBits; ++i)
if (((Mask >> i) & 1) == 1)
Bits[i] = (*LHSBits)[i];
- else
- Bits[i] = ValueBit(ValueBit::ConstZero);
+ else {
+ // AND instruction masks this bit. If the input is already zero,
+ // we have nothing to do here. Otherwise, make the bit ConstZero.
+ if ((*LHSBits)[i].isZero())
+ Bits[i] = (*LHSBits)[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+ }
return std::make_pair(Interesting, &Bits);
}
@@ -1259,8 +1269,26 @@ class BitPermutationSelector {
const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
bool AllDisjoint = true;
- for (unsigned i = 0; i < NumBits; ++i)
- if (LHSBits[i].isZero())
+ SDValue LastVal = SDValue();
+ unsigned LastIdx = 0;
+ for (unsigned i = 0; i < NumBits; ++i) {
+ if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
+ // If both inputs are known to be zero and one is ConstZero and
+ // another is VariableKnownToBeZero, we can select whichever
+ // we like. To minimize the number of bit groups, we select
+ // VariableKnownToBeZero if this bit is the next bit of the same
+ // input variable from the previous bit. Otherwise, we select
+ // ConstZero.
+ if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
+ LHSBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = LHSBits[i];
+ else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
+ RHSBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = RHSBits[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+ }
+ else if (LHSBits[i].isZero())
Bits[i] = RHSBits[i];
else if (RHSBits[i].isZero())
Bits[i] = LHSBits[i];
@@ -1268,6 +1296,16 @@ class BitPermutationSelector {
AllDisjoint = false;
break;
}
+ // We remember the value and bit index of this bit.
+ if (Bits[i].hasValue()) {
+ LastVal = Bits[i].getValue();
+ LastIdx = Bits[i].getValueBitIndex();
+ }
+ else {
+ if (LastVal) LastVal = SDValue();
+ LastIdx = 0;
+ }
+ }
if (!AllDisjoint)
break;
@@ -1293,6 +1331,72 @@ class BitPermutationSelector {
return std::make_pair(Interesting, &Bits);
}
+ case ISD::TRUNCATE: {
+ EVT FromType = V.getOperand(0).getValueType();
+ EVT ToType = V.getValueType();
+ // We support only the case with truncate from i64 to i32.
+ if (FromType != MVT::i64 || ToType != MVT::i32)
+ break;
+ const unsigned NumAllBits = FromType.getSizeInBits();
+ SmallVector<ValueBit, 64> *InBits;
+ std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
+ NumAllBits);
+ const unsigned NumValidBits = ToType.getSizeInBits();
+
+ // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
+ // So, we cannot include this truncate.
+ bool UseUpper32bit = false;
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
+ UseUpper32bit = true;
+ break;
+ }
+ if (UseUpper32bit)
+ break;
+
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*InBits)[i];
+
+ return std::make_pair(Interesting, &Bits);
+ }
+ case ISD::AssertZext: {
+ // For AssertZext, we look through the operand and
+ // mark the bits known to be zero.
+ const SmallVector<ValueBit, 64> *LHSBits;
+ std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
+ NumBits);
+
+ EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
+ const unsigned NumValidBits = FromType.getSizeInBits();
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*LHSBits)[i];
+
+ // These bits are known to be zero.
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit((*LHSBits)[i].getValue(),
+ (*LHSBits)[i].getValueBitIndex(),
+ ValueBit::VariableKnownToBeZero);
+
+ return std::make_pair(Interesting, &Bits);
+ }
+ case ISD::LOAD:
+ LoadSDNode *LD = cast<LoadSDNode>(V);
+ if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
+ EVT VT = LD->getMemoryVT();
+ const unsigned NumValidBits = VT.getSizeInBits();
+
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = ValueBit(V, i);
+
+ // These bits are known to be zero.
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
+
+ // Zero-extending load itself cannot be optimized. So, it is not
+ // interesting by itself though it gives useful information.
+ return std::make_pair(Interesting = false, &Bits);
+ }
+ break;
}
for (unsigned i = 0; i < NumBits; ++i)
@@ -1304,7 +1408,7 @@ class BitPermutationSelector {
// For each value (except the constant ones), compute the left-rotate amount
// to get it from its original to final position.
void computeRotationAmounts() {
- HasZeros = false;
+ NeedMask = false;
RLAmt.resize(Bits.size());
for (unsigned i = 0; i < Bits.size(); ++i)
if (Bits[i].hasValue()) {
@@ -1314,7 +1418,7 @@ class BitPermutationSelector {
else
RLAmt[i] = Bits.size() - (VBI - i);
} else if (Bits[i].isZero()) {
- HasZeros = true;
+ NeedMask = true;
RLAmt[i] = UINT32_MAX;
} else {
llvm_unreachable("Unknown value bit type");
@@ -1330,6 +1434,7 @@ class BitPermutationSelector {
unsigned LastRLAmt = RLAmt[0];
SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
unsigned LastGroupStartIdx = 0;
+ bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
for (unsigned i = 1; i < Bits.size(); ++i) {
unsigned ThisRLAmt = RLAmt[i];
SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
@@ -1342,10 +1447,20 @@ class BitPermutationSelector {
LastGroupStartIdx = 0;
}
+ // If this bit is known to be zero and the current group is a bit group
+ // of zeros, we do not need to terminate the current bit group even the
+ // Value or RLAmt does not match here. Instead, we terminate this group
+ // when the first non-zero bit appears later.
+ if (IsGroupOfZeros && Bits[i].isZero())
+ continue;
+
// If this bit has the same underlying value and the same rotate factor as
// the last one, then they're part of the same group.
if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
- continue;
+ // We cannot continue the current group if this bits is not known to
+ // be zero in a bit group of zeros.
+ if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
+ continue;
if (LastValue.getNode())
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1353,6 +1468,7 @@ class BitPermutationSelector {
LastRLAmt = ThisRLAmt;
LastValue = ThisValue;
LastGroupStartIdx = i;
+ IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
}
if (LastValue.getNode())
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1401,7 +1517,7 @@ class BitPermutationSelector {
for (auto &I : ValueRots) {
ValueRotsVec.push_back(I.second);
}
- llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end());
+ llvm::sort(ValueRotsVec);
}
// In 64-bit mode, rlwinm and friends have a rotation operator that
@@ -1588,6 +1704,17 @@ class BitPermutationSelector {
return ExtVal;
}
+ SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
+ if (V.getValueSizeInBits() == 32)
+ return V;
+
+ assert(V.getValueSizeInBits() == 64);
+ SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+ SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
+ MVT::i32, V, SubRegIdx), 0);
+ return SubVal;
+ }
+
// Depending on the number of groups for a particular value, it might be
// better to rotate, mask explicitly (using andi/andis), and then or the
// result. Select this part of the result first.
@@ -1646,12 +1773,12 @@ class BitPermutationSelector {
SDValue VRot;
if (VRI.RLAmt) {
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
- getI32Imm(31, dl) };
+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+ getI32Imm(0, dl), getI32Imm(31, dl) };
VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
Ops), 0);
} else {
- VRot = VRI.V;
+ VRot = TruncateToInt32(VRI.V, dl);
}
SDValue ANDIVal, ANDISVal;
@@ -1698,17 +1825,17 @@ class BitPermutationSelector {
// If we've not yet selected a 'starting' instruction, and we have no zeros
// to fill in, select the (Value, RLAmt) with the highest priority (largest
// number of groups), and start with this rotated value.
- if ((!HasZeros || LateMask) && !Res) {
+ if ((!NeedMask || LateMask) && !Res) {
ValueRotInfo &VRI = ValueRotsVec[0];
if (VRI.RLAmt) {
if (InstCnt) *InstCnt += 1;
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
- getI32Imm(31, dl) };
+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+ getI32Imm(0, dl), getI32Imm(31, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
0);
} else {
- Res = VRI.V;
+ Res = TruncateToInt32(VRI.V, dl);
}
// Now, remove all groups with this underlying value and rotation factor.
@@ -1723,13 +1850,13 @@ class BitPermutationSelector {
for (auto &BG : BitGroups) {
if (!Res) {
SDValue Ops[] =
- { BG.V, getI32Imm(BG.RLAmt, dl),
+ { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
} else {
SDValue Ops[] =
- { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+ { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
@@ -2077,7 +2204,7 @@ class BitPermutationSelector {
// If we've not yet selected a 'starting' instruction, and we have no zeros
// to fill in, select the (Value, RLAmt) with the highest priority (largest
// number of groups), and start with this rotated value.
- if ((!HasZeros || LateMask) && !Res) {
+ if ((!NeedMask || LateMask) && !Res) {
// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
// groups will come first, and so the VRI representing the largest number
// of groups might not be first (it might be the first Repl32 groups).
@@ -2230,7 +2357,7 @@ class BitPermutationSelector {
SmallVector<ValueBit, 64> Bits;
- bool HasZeros;
+ bool NeedMask;
SmallVector<unsigned, 64> RLAmt;
SmallVector<BitGroup, 16> BitGroups;
@@ -2259,10 +2386,10 @@ public:
" selection for: ");
LLVM_DEBUG(N->dump(CurDAG));
- // Fill it RLAmt and set HasZeros.
+ // Fill it RLAmt and set NeedMask.
computeRotationAmounts();
- if (!HasZeros)
+ if (!NeedMask)
return Select(N, false);
// We currently have two techniques for handling results with zeros: early
@@ -4045,54 +4172,148 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
}
-/// This method returns a node after flipping the MSB of each element
-/// of vector integer type. Additionally, if SignBitVec is non-null,
-/// this method sets a node with one at MSB of all elements
-/// and zero at other bits in SignBitVec.
-MachineSDNode *
-PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) {
- SDLoc dl(N);
- EVT VecVT = N.getValueType();
- if (VecVT == MVT::v4i32) {
- if (SignBitVec) {
- SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32);
- *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,
- SDValue(ZV, 0));
- }
- return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);
- }
- else if (VecVT == MVT::v8i16) {
- SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32,
- getI32Imm(0x8000, dl));
- SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32,
- SDValue(Hi, 0),
- getI32Imm(0x8000, dl));
- SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,
- SDValue(ScaImm, 0));
- /*
- Alternatively, we can do this as follow to use VRF instead of GPR.
- vspltish 5, 1
- vspltish 6, 15
- vslh 5, 6, 5
- */
- if (SignBitVec) *SignBitVec = VecImm;
- return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N,
- SDValue(VecImm, 0));
- }
- else if (VecVT == MVT::v16i8) {
- SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,
- getI32Imm(0x80, dl));
- if (SignBitVec) *SignBitVec = VecImm;
- return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N,
- SDValue(VecImm, 0));
+static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
+ bool &NeedSwapOps, bool &IsUnCmp) {
+
+ assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue TrueRes = N->getOperand(2);
+ SDValue FalseRes = N->getOperand(3);
+ ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
+ if (!TrueConst)
+ return false;
+
+ assert((N->getSimpleValueType(0) == MVT::i64 ||
+ N->getSimpleValueType(0) == MVT::i32) &&
+ "Expecting either i64 or i32 here.");
+
+ // We are looking for any of:
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
+ int64_t TrueResVal = TrueConst->getSExtValue();
+ if ((TrueResVal < -1 || TrueResVal > 1) ||
+ (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
+ (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
+ (TrueResVal == 0 &&
+ (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
+ return false;
+
+ bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
+ SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
+ if (SetOrSelCC.getOpcode() != ISD::SETCC &&
+ SetOrSelCC.getOpcode() != ISD::SELECT_CC)
+ return false;
+
+ // Without this setb optimization, the outer SELECT_CC will be manually
+ // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
+ // transforms pseduo instruction to isel instruction. When there are more than
+ // one use for result like zext/sext, with current optimization we only see
+ // isel is replaced by setb but can't see any significant gain. Since
+ // setb has longer latency than original isel, we should avoid this. Another
+ // point is that setb requires comparison always kept, it can break the
+ // oppotunity to get the comparison away if we have in future.
+ if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
+ return false;
+
+ SDValue InnerLHS = SetOrSelCC.getOperand(0);
+ SDValue InnerRHS = SetOrSelCC.getOperand(1);
+ ISD::CondCode InnerCC =
+ cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
+ // If the inner comparison is a select_cc, make sure the true/false values are
+ // 1/-1 and canonicalize it if needed.
+ if (InnerIsSel) {
+ ConstantSDNode *SelCCTrueConst =
+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
+ ConstantSDNode *SelCCFalseConst =
+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
+ if (!SelCCTrueConst || !SelCCFalseConst)
+ return false;
+ int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
+ int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
+ // The values must be -1/1 (requiring a swap) or 1/-1.
+ if (SelCCTVal == -1 && SelCCFVal == 1) {
+ std::swap(InnerLHS, InnerRHS);
+ } else if (SelCCTVal != 1 || SelCCFVal != -1)
+ return false;
}
- else
- llvm_unreachable("Unsupported vector data type for flipSignBit");
+
+ // Canonicalize unsigned case
+ if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
+ IsUnCmp = true;
+ InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
+ }
+
+ bool InnerSwapped = false;
+ if (LHS == InnerRHS && RHS == InnerLHS)
+ InnerSwapped = true;
+ else if (LHS != InnerLHS || RHS != InnerRHS)
+ return false;
+
+ switch (CC) {
+ // (select_cc lhs, rhs, 0, \
+ // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
+ case ISD::SETEQ:
+ if (!InnerIsSel)
+ return false;
+ if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
+ return false;
+ NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
+ break;
+
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
+ case ISD::SETULT:
+ if (!IsUnCmp && InnerCC != ISD::SETNE)
+ return false;
+ IsUnCmp = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETLT:
+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
+ (InnerCC == ISD::SETLT && InnerSwapped))
+ NeedSwapOps = (TrueResVal == 1);
+ else
+ return false;
+ break;
+
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
+ case ISD::SETUGT:
+ if (!IsUnCmp && InnerCC != ISD::SETNE)
+ return false;
+ IsUnCmp = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETGT:
+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
+ (InnerCC == ISD::SETGT && InnerSwapped))
+ NeedSwapOps = (TrueResVal == -1);
+ else
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
+ LLVM_DEBUG(N->dump());
+
+ return true;
}
// Select - Convert the specified operand from a target-independent to a
@@ -4429,8 +4650,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
- KnownBits LHSKnown;
- CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
+ KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
// If this is equivalent to an add, then we can fold it with the
// FrameIndex calculation.
@@ -4557,6 +4777,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
N->getOperand(0).getValueType() == MVT::i1)
break;
+ if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
+ bool NeedSwapOps = false;
+ bool IsUnCmp = false;
+ if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (NeedSwapOps)
+ std::swap(LHS, RHS);
+
+ // Make use of SelectCC to generate the comparison to set CR bits, for
+ // equality comparisons having one literal operand, SelectCC probably
+ // doesn't need to materialize the whole literal and just use xoris to
+ // check it first, it leads the following comparison result can't
+ // exactly represent GT/LT relationship. So to avoid this we specify
+ // SETGT/SETUGT here instead of SETEQ.
+ SDValue GenCC =
+ SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
+ CurDAG->SelectNodeTo(
+ N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
+ N->getValueType(0), GenCC);
+ NumP9Setb++;
+ return;
+ }
+ }
+
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
@@ -4648,14 +4893,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
return;
}
- case ISD::VSELECT:
- if (PPCSubTarget->hasVSX()) {
- SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
- CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
- return;
- }
- break;
-
case ISD::VECTOR_SHUFFLE:
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
@@ -4683,11 +4920,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
+ MachineMemOperand *MemOp = LD->getMemOperand();
SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
N->getValueType(0), Ops);
- cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
return;
}
}
@@ -4753,6 +4989,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
}
+ // A signed comparison of i1 values produces the opposite result to an
+ // unsigned one if the condition code includes less-than or greater-than.
+ // This is because 1 is the most negative signed i1 number and the most
+ // positive unsigned i1 number. The CR-logical operations used for such
+ // comparisons are non-commutative so for signed comparisons vs. unsigned
+ // ones, the input operands just need to be swapped.
+ if (ISD::isSignedIntSetCC(CC))
+ Swap = !Swap;
+
SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
N->getOperand(Swap ? 3 : 2),
N->getOperand(Swap ? 2 : 3)), 0);
@@ -4809,9 +5054,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
-
- if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
- CModel == CodeModel::Large) {
+ if (PPCLowering->isAccessedAsGotIndirect(GA)) {
+ // If it is access as got-indirect, we need an extra LD to load
+ // the address.
SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
transferMemOperands(N, MN);
@@ -4819,18 +5064,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GV = G->getGlobal();
- unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
- if (GVFlags & PPCII::MO_NLP_FLAG) {
- SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
- transferMemOperands(N, MN);
- ReplaceNode(N, MN);
- return;
- }
- }
-
+ // Build the address relative to the TOC-pointer..
ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA));
return;
@@ -4916,55 +5150,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
}
- case ISD::ABS: {
- assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");
-
- // For vector absolute difference, we use VABSDUW instruction of POWER9.
- // Since VABSDU instructions are for unsigned integers, we need adjustment
- // for signed integers.
- // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
- // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
- // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
- EVT VecVT = N->getOperand(0).getValueType();
- SDNode *AbsOp = nullptr;
- unsigned AbsOpcode;
-
- if (VecVT == MVT::v4i32)
- AbsOpcode = PPC::VABSDUW;
- else if (VecVT == MVT::v8i16)
- AbsOpcode = PPC::VABSDUH;
- else if (VecVT == MVT::v16i8)
- AbsOpcode = PPC::VABSDUB;
- else
- llvm_unreachable("Unsupported vector data type for ISD::ABS");
-
- // Even for signed integers, we can skip adjustment if all values are
- // known to be positive (as signed integer) due to zero-extended inputs.
- if (N->getOperand(0).getOpcode() == ISD::SUB &&
- N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) {
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
- SDValue(N->getOperand(0)->getOperand(0)),
- SDValue(N->getOperand(0)->getOperand(1)));
- ReplaceNode(N, AbsOp);
- return;
- }
- if (N->getOperand(0).getOpcode() == ISD::SUB) {
- SDValue SubVal = N->getOperand(0);
- SDNode *Op0 = flipSignBit(SubVal->getOperand(0));
- SDNode *Op1 = flipSignBit(SubVal->getOperand(1));
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
- SDValue(Op0, 0), SDValue(Op1, 0));
- }
- else {
- SDNode *Op1 = nullptr;
- SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1);
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),
- SDValue(Op1, 0));
- }
- ReplaceNode(N, AbsOp);
- return;
- }
}
SelectCode(N);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b5bdf47ce37a..39608cb74bee 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -251,12 +251,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i64, Expand);
}
- if (Subtarget.hasP9Vector()) {
- setOperationAction(ISD::ABS, MVT::v4i32, Legal);
- setOperationAction(ISD::ABS, MVT::v8i16, Legal);
- setOperationAction(ISD::ABS, MVT::v16i8, Legal);
- }
-
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
@@ -323,12 +317,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// to speed up scalar BSWAP64.
// CTPOP or CTTZ were introduced in P8/P9 respectively
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- if (Subtarget.isISA3_0()) {
+ if (Subtarget.hasP9Vector())
setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
+ else
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ if (Subtarget.isISA3_0()) {
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
} else {
- setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
}
@@ -554,6 +550,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Custom);
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -586,6 +583,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::SELECT_CC, VT, Promote);
AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
setOperationAction(ISD::STORE, VT, Promote);
@@ -626,7 +624,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
@@ -659,6 +656,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ // Without hasP8Altivec set, v2i64 SMAX isn't available.
+ // But ABS custom lowering requires SMAX support.
+ if (!Subtarget.hasP8Altivec())
+ setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -727,12 +729,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
- setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
- setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
-
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
@@ -792,12 +788,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
- // Vector operation legalization checks the result type of
- // SIGN_EXTEND_INREG, overall legalization checks the inner type.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+ // Custom handling for partial vectors of integers converted to
+ // floating point. We already have optimal handling for v2i32 through
+ // the DAG combine, so those aren't necessary.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
@@ -1055,6 +1056,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
@@ -1076,6 +1078,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::TRUNCATE);
+
if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SETCC);
@@ -1088,6 +1092,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::FSQRT);
}
+ if (Subtarget.hasP9Altivec()) {
+ setTargetDAGCombine(ISD::ABS);
+ setTargetDAGCombine(ISD::VSELECT);
+ }
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -1348,6 +1357,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RFEBB: return "PPCISD::RFEBB";
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
+ case PPCISD::VABSD: return "PPCISD::VABSD";
case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
@@ -1355,6 +1365,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
+ case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
}
return nullptr;
}
@@ -2214,11 +2225,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
// disjoint.
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+ KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
if (LHSKnown.Zero.getBoolValue()) {
- DAG.computeKnownBits(N.getOperand(1), RHSKnown);
+ KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
@@ -2317,8 +2327,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
- KnownBits LHSKnown;
- DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+ KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
@@ -2405,6 +2414,28 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
return true;
}
+/// Returns true if we should use a direct load into vector instruction
+/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
+static bool usePartialVectorLoads(SDNode *N) {
+ if (!N->hasOneUse())
+ return false;
+
+ // If there are any other uses other than scalar to vector, then we should
+ // keep it as a scalar load -> direct move pattern to prevent multiple
+ // loads. Currently, only check for i64 since we have lxsd/lfd to do this
+ // efficiently, but no update equivalent.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ EVT MemVT = LD->getMemoryVT();
+ if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
+ SDNode *User = *(LD->use_begin());
+ if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -2430,6 +2461,13 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
} else
return false;
+ // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
+ // instructions because we can fold these into a more efficient instruction
+ // instead, (such as LXSD).
+ if (isLoad && usePartialVectorLoads(N)) {
+ return false;
+ }
+
// PowerPC doesn't have preinc load/store instructions for vectors (except
// for QPX, which does have preinc r+r forms).
if (VT.isVector()) {
@@ -2674,7 +2712,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.isSVR4ABI() && isPositionIndependent()) {
+ if (Subtarget.isSVR4ABI() &&
+ (Subtarget.isPPC64() || isPositionIndependent())) {
if (Subtarget.isPPC64())
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
@@ -3480,9 +3519,14 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Argument stored in memory.
assert(VA.isMemLoc());
+ // Get the extended size of the argument type in stack
unsigned ArgSize = VA.getLocVT().getStoreSize();
- int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- isImmutable);
+ // Get the actual size of the argument type
+ unsigned ObjSize = VA.getValVT().getStoreSize();
+ unsigned ArgOffset = VA.getLocMemOffset();
+ // Stack objects in PPC32 are right justified.
+ ArgOffset += ArgSize - ObjSize;
+ int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3935,7 +3979,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
"Invalid QPX parameter type");
- /* fall through */
+ LLVM_FALLTHROUGH;
case MVT::v4f64:
case MVT::v4i1:
@@ -5053,9 +5097,15 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
// All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
// into the call.
- if (isSVR4ABI && isPPC64 && !isPatchPoint) {
+ // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
+ if (isSVR4ABI && isPPC64) {
setUsesTOCBasePtr(DAG);
- Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
+ // We cannot add X2 as an operand here for PATCHPOINT, because there is no
+ // way to mark dependencies as implicit here. We will add the X2 dependency
+ // in EmitInstrWithCustomInserter.
+ if (!isPatchPoint)
+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
}
return CallOpc;
@@ -5437,10 +5487,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
Arg = PtrOff;
}
- if (VA.isRegLoc()) {
- if (Arg.getValueType() == MVT::i1)
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+ // When useCRBits() is true, there can be i1 arguments.
+ // It is because getRegisterType(MVT::i1) => MVT::i1,
+ // and for other integer types getRegisterType() => MVT::i32.
+ // Extend i1 and ensure callee will get i32.
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, MVT::i32, Arg);
+ if (VA.isRegLoc()) {
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -6073,7 +6128,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
"Invalid QPX parameter type");
- /* fall through */
+ LLVM_FALLTHROUGH;
case MVT::v4f64:
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
@@ -7228,10 +7283,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
return FP;
}
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
+
+ EVT VecVT = Vec.getValueType();
+ assert(VecVT.isVector() && "Expected a vector type.");
+ assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
+
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+ unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = Vec;
+ SDValue UndefVec = DAG.getUNDEF(VecVT);
+ for (unsigned i = 1; i < NumConcat; ++i)
+ Ops[i] = UndefVec;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
+}
+
+SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+ const SDLoc &dl) const {
+
+ unsigned Opc = Op.getOpcode();
+ assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
+ "Unexpected conversion type");
+ assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
+ "Supports conversions to v2f64/v4f32 only.");
+
+ bool SignedConv = Opc == ISD::SINT_TO_FP;
+ bool FourEltRes = Op.getValueType() == MVT::v4f32;
+
+ SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
+ EVT WideVT = Wide.getValueType();
+ unsigned WideNumElts = WideVT.getVectorNumElements();
+ MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
+
+ SmallVector<int, 16> ShuffV;
+ for (unsigned i = 0; i < WideNumElts; ++i)
+ ShuffV.push_back(i + WideNumElts);
+
+ int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
+ int SaveElts = FourEltRes ? 4 : 2;
+ if (Subtarget.isLittleEndian())
+ for (int i = 0; i < SaveElts; i++)
+ ShuffV[i * Stride] = i;
+ else
+ for (int i = 1; i <= SaveElts; i++)
+ ShuffV[i * Stride - 1] = i - 1;
+
+ SDValue ShuffleSrc2 =
+ SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
+ SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
+ unsigned ExtendOp =
+ SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
+
+ SDValue Extend;
+ if (!Subtarget.hasP9Altivec() && SignedConv) {
+ Arrange = DAG.getBitcast(IntermediateVT, Arrange);
+ Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
+ DAG.getValueType(Op.getOperand(0).getValueType()));
+ } else
+ Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
+}
+
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ EVT InVT = Op.getOperand(0).getValueType();
+ EVT OutVT = Op.getValueType();
+ if (OutVT.isVector() && OutVT.isFloatingPoint() &&
+ isOperationCustom(Op.getOpcode(), InVT))
+ return LowerINT_TO_FPVector(Op, DAG, dl);
+
// Conversions to f128 are legal.
if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
return Op;
@@ -8902,35 +9030,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(PPC::R2, MVT::i32);
}
- // We are looking for absolute values here.
- // The idea is to try to fit one of two patterns:
- // max (a, (0-a)) OR max ((0-a), a)
- if (Subtarget.hasP9Vector() &&
- (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw ||
- IntrinsicID == Intrinsic::ppc_altivec_vmaxsh ||
- IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) {
- SDValue V1 = Op.getOperand(1);
- SDValue V2 = Op.getOperand(2);
- if (V1.getSimpleValueType() == V2.getSimpleValueType() &&
- (V1.getSimpleValueType() == MVT::v4i32 ||
- V1.getSimpleValueType() == MVT::v8i16 ||
- V1.getSimpleValueType() == MVT::v16i8)) {
- if ( V1.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
- V1.getOperand(1) == V2 ) {
- // Generate the abs instruction with the operands
- return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2);
- }
-
- if ( V2.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
- V2.getOperand(1) == V1 ) {
- // Generate the abs instruction with the operands
- return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1);
- }
- }
- }
-
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
int CompareOpc;
@@ -9081,30 +9180,6 @@ SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
}
-SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc dl(Op);
- // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
- // instructions), but for smaller types, we need to first extend up to v2i32
- // before doing going farther.
- if (Op.getValueType() == MVT::v2i64) {
- EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- if (ExtVT != MVT::v2i32) {
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
- Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
- DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
- ExtVT.getVectorElementType(), 4)));
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
- Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
- DAG.getValueType(MVT::v2i32));
- }
-
- return Op;
- }
-
- return SDValue();
-}
-
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9495,6 +9570,44 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
}
}
+SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+
+ assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
+
+ EVT VT = Op.getValueType();
+ assert(VT.isVector() &&
+ "Only set vector abs as custom, scalar abs shouldn't reach here!");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ VT == MVT::v16i8) &&
+ "Unexpected vector element type!");
+ assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
+ "Current subtarget doesn't support smax v2i64!");
+
+ // For vector abs, it can be lowered to:
+ // abs x
+ // ==>
+ // y = -x
+ // smax(x, y)
+
+ SDLoc dl(Op);
+ SDValue X = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
+
+ // SMAX patch https://reviews.llvm.org/D47332
+ // hasn't landed yet, so use intrinsic first here.
+ // TODO: Should use SMAX directly once SMAX patch landed
+ Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
+ if (VT == MVT::v2i64)
+ BifID = Intrinsic::ppc_altivec_vmaxsd;
+ else if (VT == MVT::v8i16)
+ BifID = Intrinsic::ppc_altivec_vmaxsh;
+ else if (VT == MVT::v16i8)
+ BifID = Intrinsic::ppc_altivec_vmaxsb;
+
+ return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9544,10 +9657,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
- case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
@@ -9624,6 +9737,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
+ case ISD::BITCAST:
+ // Don't handle bitcast here.
+ return;
}
}
@@ -9787,17 +9903,14 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
-MachineBasicBlock *
-PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
- MachineBasicBlock *BB,
- bool is8bit, // operation
- unsigned BinOpcode,
- unsigned CmpOpcode,
- unsigned CmpPred) const {
+MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
+ MachineInstr &MI, MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
// If we support part-word atomic mnemonics, just use them
if (Subtarget.hasPartwordAtomics())
- return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
- CmpOpcode, CmpPred);
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
+ CmpPred);
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -9821,7 +9934,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB =
- CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
+ CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
if (CmpOpcode)
@@ -9832,22 +9945,25 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
- : &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC =
+ is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
unsigned ShiftReg =
- isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
- unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
- unsigned MaskReg = RegInfo.createVirtualRegister(RC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
unsigned Ptr1Reg;
- unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+ unsigned TmpReg =
+ (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
// thisMBB:
// ...
@@ -9876,82 +9992,107 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
- .addReg(ptrA).addReg(ptrB);
+ .addReg(ptrA)
+ .addReg(ptrB);
} else {
Ptr1Reg = ptrB;
}
- BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
- .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ // We need use 32-bit subregister to avoid mismatch register class in 64-bit
+ // mode.
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
+ .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
+ .addImm(3)
+ .addImm(27)
+ .addImm(is8bit ? 28 : 27);
if (!isLittleEndian)
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg)
+ .addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(61);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(61);
else
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
- BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
- .addReg(incr).addReg(ShiftReg);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
if (is8bit)
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
else {
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
- BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg)
+ .addImm(65535);
}
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
- .addReg(Mask2Reg).addReg(ShiftReg);
+ .addReg(Mask2Reg)
+ .addReg(ShiftReg);
BB = loopMBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
- .addReg(Incr2Reg).addReg(TmpDestReg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
- .addReg(TmpDestReg).addReg(MaskReg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
- .addReg(TmpReg).addReg(MaskReg);
+ .addReg(Incr2Reg)
+ .addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
if (CmpOpcode) {
// For unsigned comparisons, we can directly compare the shifted values.
// For signed comparisons we shift and sign extend.
- unsigned SReg = RegInfo.createVirtualRegister(RC);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
- .addReg(TmpDestReg).addReg(MaskReg);
+ unsigned SReg = RegInfo.createVirtualRegister(GPRC);
+ BuildMI(BB, dl, TII->get(PPC::AND), SReg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
unsigned ValueReg = SReg;
unsigned CmpReg = Incr2Reg;
if (CmpOpcode == PPC::CMPW) {
- ValueReg = RegInfo.createVirtualRegister(RC);
+ ValueReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
- .addReg(SReg).addReg(ShiftReg);
- unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
+ .addReg(SReg)
+ .addReg(ShiftReg);
+ unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
- .addReg(ValueReg);
+ .addReg(ValueReg);
ValueReg = ValueSReg;
CmpReg = incr;
}
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
- .addReg(CmpReg).addReg(ValueReg);
+ .addReg(CmpReg)
+ .addReg(ValueReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
+ .addImm(CmpPred)
+ .addReg(PPC::CR0)
+ .addMBB(exitMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(exitMBB);
BB = loop2MBB;
}
- BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
- .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
BuildMI(BB, dl, TII->get(PPC::STWCX))
- .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+ .addReg(Tmp4Reg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
- .addReg(ShiftReg);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+ .addReg(TmpDestReg)
+ .addReg(ShiftReg);
return BB;
}
@@ -9968,10 +10109,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
-
unsigned DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
@@ -10034,10 +10171,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
- .addReg(PPC::X2)
- .addImm(TOCOffset)
- .addReg(BufReg);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addReg(PPC::X2)
+ .addImm(TOCOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
}
// Naked functions never have a base pointer, and so we use r1. For all
@@ -10052,8 +10189,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
.addReg(BaseReg)
.addImm(BPOffset)
- .addReg(BufReg);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
@@ -10086,8 +10223,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
.addImm(LabelOffset)
.addReg(BufReg);
}
-
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
mainMBB->addSuccessor(sinkMBB);
@@ -10111,10 +10247,6 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
-
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -10152,7 +10284,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(0)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload IP
if (PVT == MVT::i64) {
@@ -10164,7 +10296,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(LabelOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload SP
if (PVT == MVT::i64) {
@@ -10176,7 +10308,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(SPOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload BP
if (PVT == MVT::i64) {
@@ -10188,16 +10320,15 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(BPOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload TOC
if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
- .addImm(TOCOffset)
- .addReg(BufReg);
-
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addImm(TOCOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
}
// Jump
@@ -10221,7 +10352,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// way to mark the dependence as implicit there, and so the stackmap code
// will confuse it with a regular operand. Instead, add the dependence
// here.
- setUsesTOCBasePtr(*BB->getParent());
MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
}
@@ -10246,8 +10376,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *F = BB->getParent();
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
- MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
+ MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
+ MI.getOpcode() == PPC::SELECT_I8) {
SmallVector<MachineOperand, 2> Cond;
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -10392,9 +10522,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
- .addReg(HiReg).addReg(ReadAgainReg);
+ .addReg(HiReg)
+ .addReg(ReadAgainReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(CmpReg)
+ .addMBB(readMBB);
BB->addSuccessor(readMBB);
BB->addSuccessor(sinkMBB);
@@ -10564,27 +10697,35 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
- .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
- .addReg(oldval).addReg(dest);
+ .addReg(oldval)
+ .addReg(dest);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(midMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(midMBB);
BB = loop2MBB;
BuildMI(BB, dl, TII->get(StoreMnemonic))
- .addReg(newval).addReg(ptrA).addReg(ptrB);
+ .addReg(newval)
+ .addReg(ptrA)
+ .addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
BB = midMBB;
BuildMI(BB, dl, TII->get(StoreMnemonic))
- .addReg(dest).addReg(ptrA).addReg(ptrB);
+ .addReg(dest)
+ .addReg(ptrA)
+ .addReg(ptrB);
BB->addSuccessor(exitMBB);
// exitMBB:
@@ -10619,24 +10760,26 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
- : &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC =
+ is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
unsigned ShiftReg =
- isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
- unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
- unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
- unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
- unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
- unsigned MaskReg = RegInfo.createVirtualRegister(RC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
unsigned Ptr1Reg;
- unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
@@ -10673,74 +10816,107 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
- .addReg(ptrA).addReg(ptrB);
+ .addReg(ptrA)
+ .addReg(ptrB);
} else {
Ptr1Reg = ptrB;
}
- BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
- .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+
+ // We need use 32-bit subregister to avoid mismatch register class in 64-bit
+ // mode.
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
+ .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
+ .addImm(3)
+ .addImm(27)
+ .addImm(is8bit ? 28 : 27);
if (!isLittleEndian)
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg)
+ .addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(61);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(61);
else
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(29);
BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
- .addReg(newval).addReg(ShiftReg);
+ .addReg(newval)
+ .addReg(ShiftReg);
BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
- .addReg(oldval).addReg(ShiftReg);
+ .addReg(oldval)
+ .addReg(ShiftReg);
if (is8bit)
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
else {
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
- .addReg(Mask3Reg).addImm(65535);
+ .addReg(Mask3Reg)
+ .addImm(65535);
}
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
- .addReg(Mask2Reg).addReg(ShiftReg);
+ .addReg(Mask2Reg)
+ .addReg(ShiftReg);
BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
- .addReg(NewVal2Reg).addReg(MaskReg);
+ .addReg(NewVal2Reg)
+ .addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
- .addReg(OldVal2Reg).addReg(MaskReg);
+ .addReg(OldVal2Reg)
+ .addReg(MaskReg);
BB = loop1MBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
- BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
- .addReg(TmpDestReg).addReg(MaskReg);
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
- .addReg(TmpReg).addReg(OldVal3Reg);
+ .addReg(TmpReg)
+ .addReg(OldVal3Reg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(midMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(midMBB);
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
- .addReg(TmpDestReg).addReg(MaskReg);
- BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
- .addReg(Tmp2Reg).addReg(NewVal3Reg);
- BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
- .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
+ .addReg(Tmp2Reg)
+ .addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(Tmp4Reg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
BB = midMBB;
- BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(TmpDestReg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
- .addReg(ShiftReg);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+ .addReg(TmpReg)
+ .addReg(ShiftReg);
} else if (MI.getOpcode() == PPC::FADDrtz) {
// This pseudo performs an FADD with rounding mode temporarily forced
// to round-to-zero. We emit this via custom inserter since the FPSCR
@@ -10777,9 +10953,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
- &PPC::GPRCRegClass :
- &PPC::G8RCRegClass);
+ unsigned Dest = RegInfo.createVirtualRegister(
+ Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
DebugLoc dl = MI.getDebugLoc();
BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
@@ -11231,9 +11406,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
} else {
// This is neither a signed nor an unsigned comparison, just make sure
// that the high bits are equal.
- KnownBits Op1Known, Op2Known;
- DAG.computeKnownBits(N->getOperand(0), Op1Known);
- DAG.computeKnownBits(N->getOperand(1), Op2Known);
+ KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
+ KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
// We don't really care about what is known about the first bit (if
// anything), so clear it in all masks prior to comparing them.
@@ -11750,6 +11924,37 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}
+SDValue PPCTargetLowering::combineSetCC(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::SETCC &&
+ "Should be called with a SETCC node");
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (CC == ISD::SETNE || CC == ISD::SETEQ) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
+ if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+ LHS.hasOneUse())
+ std::swap(LHS, RHS);
+
+ // x == 0-y --> x+y == 0
+ // x != 0-y --> x+y != 0
+ if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
+ RHS.hasOneUse()) {
+ SDLoc DL(N);
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ EVT OpVT = LHS.getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
+ return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
+ }
+ }
+
+ return DAGCombineTruncBoolExt(N, DCI);
+}
+
// Is this an extending load from an f32 to an f64?
static bool isFPExtLoad(SDValue Op) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
@@ -11869,7 +12074,8 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
}
// Not a build vector of (possibly fp_rounded) loads.
- if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
+ if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
+ N->getNumOperands() == 1)
return SDValue();
for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
@@ -12450,6 +12656,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
+ case ISD::ADD:
+ return combineADD(N, DCI);
case ISD::SHL:
return combineSHL(N, DCI);
case ISD::SRA:
@@ -12476,7 +12684,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND:
return DAGCombineExtBoolTrunc(N, DCI);
case ISD::TRUNCATE:
+ return combineTRUNCATE(N, DCI);
case ISD::SETCC:
+ if (SDValue CSCC = combineSetCC(N, DCI))
+ return CSCC;
+ LLVM_FALLTHROUGH;
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
case ISD::SINT_TO_FP:
@@ -12499,9 +12711,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
(Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
- // STBRX can only handle simple types.
+ // STBRX can only handle simple types and it makes no sense to store less
+ // two bytes in byte-reversed order.
EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
- if (mVT.isExtended())
+ if (mVT.isExtended() || mVT.getSizeInBits() < 16)
break;
SDValue BSwapOp = N->getOperand(1).getOperand(0);
@@ -12877,6 +13090,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+
+ // Combine vmaxsw/h/b(a, a's negation) to abs(a)
+ // Expose the vabsduw/h/b opportunity for down stream
+ if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
+ (IID == Intrinsic::ppc_altivec_vmaxsw ||
+ IID == Intrinsic::ppc_altivec_vmaxsh ||
+ IID == Intrinsic::ppc_altivec_vmaxsb)) {
+ SDValue V1 = N->getOperand(1);
+ SDValue V2 = N->getOperand(2);
+ if ((V1.getSimpleValueType() == MVT::v4i32 ||
+ V1.getSimpleValueType() == MVT::v8i16 ||
+ V1.getSimpleValueType() == MVT::v16i8) &&
+ V1.getSimpleValueType() == V2.getSimpleValueType()) {
+ // (0-a, a)
+ if (V1.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
+ V1.getOperand(1) == V2) {
+ return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
+ }
+ // (a, 0-a)
+ if (V2.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
+ V2.getOperand(1) == V1) {
+ return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+ }
+ // (x-y, y-x)
+ if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
+ V1.getOperand(0) == V2.getOperand(1) &&
+ V1.getOperand(1) == V2.getOperand(0)) {
+ return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+ }
+ }
+ }
}
break;
@@ -13109,6 +13355,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::BUILD_VECTOR:
return DAGCombineBuildVector(N, DCI);
+ case ISD::ABS:
+ return combineABS(N, DCI);
+ case ISD::VSELECT:
+ return combineVSelect(N, DCI);
}
return SDValue();
@@ -13251,7 +13501,8 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const {
} else if (Constraint == "wc") { // individual CR bits.
return C_RegisterClass;
} else if (Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf" || Constraint == "ws") {
+ Constraint == "wf" || Constraint == "ws" ||
+ Constraint == "wi") {
return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
@@ -13281,6 +13532,8 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
return CW_Register;
else if (StringRef(constraint) == "ws" && type->isDoubleTy())
return CW_Register;
+ else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
+ return CW_Register; // just hold 64-bit integers data.
switch (*constraint) {
default:
@@ -13363,7 +13616,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// An individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass);
} else if ((Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf") && Subtarget.hasVSX()) {
+ Constraint == "wf" || Constraint == "wi") &&
+ Subtarget.hasVSX()) {
return std::make_pair(0U, &PPC::VSRCRegClass);
} else if (Constraint == "ws" && Subtarget.hasVSX()) {
if (VT == MVT::f32 && Subtarget.hasP8Vector())
@@ -13598,6 +13852,35 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
report_fatal_error("Invalid register name global variable");
}
+bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
+ // 32-bit SVR4 ABI access everything as got-indirect.
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ return true;
+
+ CodeModel::Model CModel = getTargetMachine().getCodeModel();
+ // If it is small or large code model, module locals are accessed
+ // indirectly by loading their address from .toc/.got. The difference
+ // is that for large code model we have ADDISTocHa + LDtocL and for
+ // small code model we simply have LDtoc.
+ if (CModel == CodeModel::Small || CModel == CodeModel::Large)
+ return true;
+
+ // JumpTable and BlockAddress are accessed as got-indirect.
+ if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
+ return true;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+ const GlobalValue *GV = G->getGlobal();
+ unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
+ // The NLP flag indicates that a global access has to use an
+ // extra indirection.
+ if (GVFlags & PPCII::MO_NLP_FLAG)
+ return true;
+ }
+
+ return false;
+}
+
bool
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The PowerPC target isn't yet aware of offsets.
@@ -14116,7 +14399,30 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
return Value;
- return SDValue();
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Subtarget.isISA3_0() ||
+ N0.getOpcode() != ISD::SIGN_EXTEND ||
+ N0.getOperand(0).getValueType() != MVT::i32 ||
+ CN1 == nullptr || N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ // We can't save an operation here if the value is already extended, and
+ // the existing shift is easier to combine.
+ SDValue ExtsSrc = N0.getOperand(0);
+ if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
+ ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
+ return SDValue();
+
+ SDLoc DL(N0);
+ SDValue ShiftBy = SDValue(CN1, 0);
+ // We want the shift amount to be i32 on the extswli, but the shift could
+ // have an i64.
+ if (ShiftBy.getValueType() == MVT::i64)
+ ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
+
+ return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
+ ShiftBy);
}
SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
@@ -14133,6 +14439,152 @@ SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
return SDValue();
}
+// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
+// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
+// When C is zero, the equation (addi Z, -C) can be simplified to Z
+// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
+static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ if (!Subtarget.isPPC64())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ auto isZextOfCompareWithConstant = [](SDValue Op) {
+ if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
+ Op.getValueType() != MVT::i64)
+ return false;
+
+ SDValue Cmp = Op.getOperand(0);
+ if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
+ Cmp.getOperand(0).getValueType() != MVT::i64)
+ return false;
+
+ if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
+ int64_t NegConstant = 0 - Constant->getSExtValue();
+ // Due to the limitations of the addi instruction,
+ // -C is required to be [-32768, 32767].
+ return isInt<16>(NegConstant);
+ }
+
+ return false;
+ };
+
+ bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
+ bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
+
+ // If there is a pattern, canonicalize a zext operand to the RHS.
+ if (LHSHasPattern && !RHSHasPattern)
+ std::swap(LHS, RHS);
+ else if (!LHSHasPattern && !RHSHasPattern)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
+ SDValue Cmp = RHS.getOperand(0);
+ SDValue Z = Cmp.getOperand(0);
+ auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
+
+ assert(Constant && "Constant Should not be a null pointer.");
+ int64_t NegConstant = 0 - Constant->getSExtValue();
+
+ switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
+ default: break;
+ case ISD::SETNE: {
+ // when C == 0
+ // --> addze X, (addic Z, -1).carry
+ // /
+ // add X, (zext(setne Z, C))--
+ // \ when -32768 <= -C <= 32767 && C != 0
+ // --> addze X, (addic (addi Z, -C), -1).carry
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+ DAG.getConstant(NegConstant, DL, MVT::i64));
+ SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+ SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+ AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
+ return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+ SDValue(Addc.getNode(), 1));
+ }
+ case ISD::SETEQ: {
+ // when C == 0
+ // --> addze X, (subfic Z, 0).carry
+ // /
+ // add X, (zext(sete Z, C))--
+ // \ when -32768 <= -C <= 32767 && C != 0
+ // --> addze X, (subfic (addi Z, -C), 0).carry
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+ DAG.getConstant(NegConstant, DL, MVT::i64));
+ SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+ SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+ DAG.getConstant(0, DL, MVT::i64), AddOrZ);
+ return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+ SDValue(Subc.getNode(), 1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
+ return Value;
+
+ return SDValue();
+}
+
+// Detect TRUNCATE operations on bitcasts of float128 values.
+// What we are looking for here is the situtation where we extract a subset
+// of bits from a 128 bit float.
+// This can be of two forms:
+// 1) BITCAST of f128 feeding TRUNCATE
+// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
+// The reason this is required is because we do not have a legal i128 type
+// and so we want to prevent having to store the f128 and then reload part
+// of it.
+SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // If we are using CRBits then try that first.
+ if (Subtarget.useCRBits()) {
+ // Check if CRBits did anything and return that if it did.
+ if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
+ return CRTruncValue;
+ }
+
+ SDLoc dl(N);
+ SDValue Op0 = N->getOperand(0);
+
+ // Looking for a truncate of i128 to i64.
+ if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
+
+ // SRL feeding TRUNCATE.
+ if (Op0.getOpcode() == ISD::SRL) {
+ ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ // The right shift has to be by 64 bits.
+ if (!ConstNode || ConstNode->getZExtValue() != 64)
+ return SDValue();
+
+ // Switch the element number to extract.
+ EltToExtract = EltToExtract ? 0 : 1;
+ // Update Op0 past the SRL.
+ Op0 = Op0.getOperand(0);
+ }
+
+ // BITCAST feeding a TRUNCATE possibly via SRL.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op0.getValueType() == MVT::i128 &&
+ Op0.getOperand(0).getValueType() == MVT::f128) {
+ SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
+ return DCI.DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
+ DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
+ }
+ return SDValue();
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
@@ -14168,6 +14620,15 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
}
+bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
+ if (!Subtarget.hasVSX())
+ return false;
+ if (Subtarget.hasP9Vector() && VT == MVT::f128)
+ return true;
+ return VT == MVT::f32 || VT == MVT::f64 ||
+ VT == MVT::v4f32 || VT == MVT::v2f64;
+}
+
bool PPCTargetLowering::
isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
const Value *Mask = AndI.getOperand(1);
@@ -14184,3 +14645,109 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
// For non-constant masks, we can always use the record-form and.
return true;
}
+
+// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
+SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
+ assert(Subtarget.hasP9Altivec() &&
+ "Only combine this when P9 altivec supported!");
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ if (N->getOperand(0).getOpcode() == ISD::SUB) {
+ // Even for signed integers, if it's known to be positive (as signed
+ // integer) due to zero-extended inputs.
+ unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
+ unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
+ if ((SubOpcd0 == ISD::ZERO_EXTEND ||
+ SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ (SubOpcd1 == ISD::ZERO_EXTEND ||
+ SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0)->getOperand(0),
+ N->getOperand(0)->getOperand(1),
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+
+ // For type v4i32, it can be optimized with xvnegsp + vabsduw
+ if (N->getOperand(0).getValueType() == MVT::v4i32 &&
+ N->getOperand(0).hasOneUse()) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0)->getOperand(0),
+ N->getOperand(0)->getOperand(1),
+ DAG.getTargetConstant(1, dl, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
+// For type v4i32/v8ii16/v16i8, transform
+// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
+// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
+SDValue PPCTargetLowering::combineVSelect(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
+ assert(Subtarget.hasP9Altivec() &&
+ "Only combine this when P9 altivec supported!");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue TrueOpnd = N->getOperand(1);
+ SDValue FalseOpnd = N->getOperand(2);
+ EVT VT = N->getOperand(1).getValueType();
+
+ if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
+ FalseOpnd.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ // ABSD only available for type v4i32/v8i16/v16i8
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+
+ // At least to save one more dependent computation
+ if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
+ return SDValue();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Can only handle unsigned comparison here
+ switch (CC) {
+ default:
+ return SDValue();
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ std::swap(TrueOpnd, FalseOpnd);
+ break;
+ }
+
+ SDValue CmpOpnd1 = Cond.getOperand(0);
+ SDValue CmpOpnd2 = Cond.getOperand(1);
+
+ // SETCC CmpOpnd1 CmpOpnd2 cond
+ // TrueOpnd = CmpOpnd1 - CmpOpnd2
+ // FalseOpnd = CmpOpnd2 - CmpOpnd1
+ if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
+ TrueOpnd.getOperand(1) == CmpOpnd2 &&
+ FalseOpnd.getOperand(0) == CmpOpnd2 &&
+ FalseOpnd.getOperand(1) == CmpOpnd1) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
+ CmpOpnd1, CmpOpnd2,
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+
+ return SDValue();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f174943a8004..30acd60eba6f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -149,6 +149,10 @@ namespace llvm {
/// For vector types, only the last n bits are used. See vsld.
SRL, SRA, SHL,
+ /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+ /// word and shift left immediate.
+ EXTSWSLI,
+
/// The combination of sra[wd]i and addze used to implemented signed
/// integer division by a power of 2. The first operand is the dividend,
/// and the second is the constant shift amount (representing the
@@ -369,6 +373,21 @@ namespace llvm {
/// An SDNode for swaps that are not associated with any loads/stores
/// and thereby have no chain.
SWAP_NO_CHAIN,
+
+ /// An SDNode for Power9 vector absolute value difference.
+ /// operand #0 vector
+ /// operand #1 vector
+ /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+ /// the most significant bit for signed i32
+ ///
+ /// Power9 VABSD* instructions are designed to support unsigned integer
+ /// vectors (byte/halfword/word), if we want to make use of them for signed
+ /// integer vectors, we have to flip their sign bits first. To flip sign bit
+ /// for byte/halfword integer vector would become inefficient, but for word
+ /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+ /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
+ /// => VABSDUW((XVNEGSP a), (XVNEGSP b))
+ VABSD,
/// QVFPERM = This corresponds to the QPX qvfperm instruction.
QVFPERM,
@@ -557,6 +576,11 @@ namespace llvm {
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ bool isSelectSupported(SelectSupportKind Kind) const override {
+ // PowerPC does not support scalar condition selects on vectors.
+ return (Kind != SelectSupportKind::ScalarCondVectorVal);
+ }
+
/// getPreferredVectorAction - The code we generate when vector types are
/// legalized by promoting the integer element type is often much worse
/// than code we generate if we widen the type for applicable vector types.
@@ -565,7 +589,7 @@ namespace llvm {
/// of v4i8's and shuffle them. This will turn into a mess of 8 extending
/// loads, moves back into VSR's (or memory ops if we don't have moves) and
/// then the VPERM for the shuffle. All in all a very slow sequence.
- TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
if (VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
@@ -785,6 +809,9 @@ namespace llvm {
return true;
}
+ // Returns true if the address of the global is stored in TOC entry.
+ bool isAccessedAsGotIndirect(SDValue N) const;
+
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -923,6 +950,9 @@ namespace llvm {
SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
+ SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+ const SDLoc &dl) const;
+
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -988,6 +1018,7 @@ namespace llvm {
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1088,6 +1119,11 @@ namespace llvm {
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
@@ -1122,6 +1158,7 @@ namespace llvm {
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
}; // end class PPCTargetLowering
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index cdd57c6a1118..2ce6ad3293eb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -94,7 +94,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
let Defs = [LR8] in
- def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
+ def MovePCtoLR8 : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -199,47 +199,45 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
// clean this up in PPCMIPeephole with calls to
// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
// in the first place.
-let usesCustomInserter = 1 in {
- let Defs = [CR0] in {
- def ATOMIC_LOAD_ADD_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
- [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_SUB_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
- [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_OR_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
- [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_XOR_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
- [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_AND_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
- [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_NAND_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
- [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_MIN_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
- [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_MAX_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
- [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_UMIN_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
- [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_UMAX_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
- [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
-
- def ATOMIC_CMP_SWAP_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
- [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
-
- def ATOMIC_SWAP_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
- [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
- }
+let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_MIN_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
+ [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_MAX_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
+ [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
+ [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
+ [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
+
+ def ATOMIC_SWAP_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
}
// Instructions to support atomic operations
@@ -269,18 +267,18 @@ def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNdi8 :Pseudo< (outs),
+def TCRETURNdi8 :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd8 $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+def TCRETURNai8 :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa8 $func $offset",
[(PPCtc_return (i64 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
+def TCRETURNri8 : PPCEmitTimePseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
@@ -347,14 +345,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
} // hasExtraSrcRegAllocReq = 1
} // hasSideEffects = 0
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
+// is not.
+let hasSideEffects = 1 in {
let Defs = [CTR8] in
- def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ def EH_SjLj_SetJmp64 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP64",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In64BitMode]>;
+}
+
+let hasSideEffects = 1, isBarrier = 1 in {
let isTerminator = 1 in
- def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ def EH_SjLj_LongJmp64 : PPCCustomInserterPseudo<(outs), (ins memr:$buf),
"#EH_SJLJ_LONGJMP64",
[(PPCeh_sjlj_longjmp addr:$buf)]>,
Requires<[In64BitMode]>;
@@ -396,10 +399,10 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
// the POWER3.
let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
+def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
[(set i64:$result,
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
-def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
+def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
[(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
let Defs = [LR8] in {
@@ -717,9 +720,10 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
-defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
- []>, isPPC64;
+ [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+ isPPC64, Requires<[IsISA3_0]>;
// For fast-isel:
let isCodeGenOnly = 1, Defs = [CARRY] in
@@ -773,8 +777,12 @@ def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC)
"maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def SETB : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
- "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
+ "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
+ "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+}
def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L),
"darn $RT, $L", IIC_LdStLD>, isPPC64;
def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),
@@ -1018,19 +1026,19 @@ def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
-def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtoc: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtoc",
[(set i64:$rD,
(PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocJTI: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocJTI",
[(set i64:$rD,
(PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
-def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocCPT: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
@@ -1071,40 +1079,40 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
// Support for medium and large code model.
let hasSideEffects = 0 in {
let isReMaterializable = 1 in {
-def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDIStocHA", []>, isPPC64;
-def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItocL", []>, isPPC64;
}
let mayLoad = 1 in
-def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
}
// Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDISgotTprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISgotTprelHA",
[(set i64:$rD,
(PPCaddisGotTprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
+def LDgotTprelL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
"#LDgotTprelL",
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
-let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
-def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
+let Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE8 : PPCPostRAExpPseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
(ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIStlsgdHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsgdHA",
[(set i64:$rD,
(PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDItlsgdL",
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1115,7 +1123,7 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
// correct because the branch select pass is relying on it.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+def GETtlsADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
@@ -1125,7 +1133,7 @@ def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
in
-def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
+def ADDItlsgdLADDR : PPCEmitTimePseudo<(outs g8rc:$rD),
(ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
"#ADDItlsgdLADDR",
[(set i64:$rD,
@@ -1133,12 +1141,12 @@ def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIStlsldHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
(PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDItlsldL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDItlsldL",
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1147,7 +1155,7 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+def GETtlsldADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsldADDR",
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
@@ -1157,7 +1165,7 @@ def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
in
-def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
+def ADDItlsldLADDR : PPCEmitTimePseudo<(outs g8rc:$rD),
(ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
"#ADDItlsldLADDR",
[(set i64:$rD,
@@ -1165,13 +1173,13 @@ def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDISdtprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
(PPCaddisDtprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIdtprelL",
[(set i64:$rD,
(PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1221,30 +1229,30 @@ def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STBUX8: XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX8: XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
@@ -1252,13 +1260,13 @@ def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrix:$dst),
- "stdu $rS, $dst", IIC_LdStSTDU, []>,
+ "stdu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
def STDUX : XForm_8_memOp<31, 181, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stdux $rS, $dst", IIC_LdStSTDUX, []>,
+ "stdux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 24969d7ef853..69b19e45c3e9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1051,6 +1051,20 @@ def : Pat<(v4f32 (ftrunc v4f32:$vA)),
def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
(VRFIN $vA)>;
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4i32 (vselect v4i32:$vA, v4i32:$vB, v4i32:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2i64 (vselect v2i64:$vA, v2i64:$vB, v2i64:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index f5f4b46344cf..2fe765dd99e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -2153,7 +2153,9 @@ class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
}
//===----------------------------------------------------------------------===//
-class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+// EmitTimePseudo won't have encoding information for the [MC]CodeEmitter
+// stuff
+class PPCEmitTimePseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
let isCodeGenOnly = 1;
let PPC64 = 0;
@@ -2162,6 +2164,21 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
let hasNoSchedulingInfo = 1;
}
+// Instruction that require custom insertion support
+// a.k.a. ISelPseudos, however, these won't have isPseudo set
+class PPCCustomInserterPseudo<dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> {
+ let usesCustomInserter = 1;
+}
+
+// PostRAPseudo will be expanded in expandPostRAPseudo, isPseudo flag in td
+// files is set only for PostRAPseudo
+class PPCPostRAExpPseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> {
+ let isPseudo = 1;
+}
+
class PseudoXFormMemOp<dag OOL, dag IOL, string asmstr, list<dag> pattern>
- : Pseudo<OOL, IOL, asmstr, pattern>, XFormMemOp;
+ : PPCPostRAExpPseudo<OOL, IOL, asmstr, pattern>, XFormMemOp;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 6c4e2129087c..0efe797c765d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -20,8 +20,8 @@ def HTM_get_imm : SDNodeXForm<imm, [{
return getI32Imm (N->getZExtValue(), SDLoc(N));
}]>;
-let hasSideEffects = 1, usesCustomInserter = 1 in {
-def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+let hasSideEffects = 1 in {
+def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0930f7d3b8d7..d754ce2990d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -987,7 +987,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::XXLOR;
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::XXLORf;
+ Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMR;
else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
@@ -1429,17 +1429,15 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
: (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI.setDesc(get(PPC::BCLR));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MI.setDesc(get(PPC::BCLRn));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
} else {
MI.setDesc(get(PPC::BCCLR));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg());
+ .add(Pred[1]);
}
return true;
@@ -1454,7 +1452,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BC));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
@@ -1462,7 +1460,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BCn));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
} else {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
@@ -1471,13 +1469,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BCC));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
}
return true;
- } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 ||
- OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+ } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
+ OpC == PPC::BCTRL8) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
@@ -1487,14 +1485,12 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
: (setLR ? PPC::BCCTRL : PPC::BCCTR)));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
return true;
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
: (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
return true;
}
@@ -1502,7 +1498,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
: (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg());
+ .add(Pred[1]);
return true;
}
@@ -1822,7 +1818,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
int NewOpC = -1;
int MIOpC = MI->getOpcode();
- if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+ if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8 ||
+ MIOpC == PPC::ANDISo || MIOpC == PPC::ANDISo8)
NewOpC = MIOpC;
else {
NewOpC = PPC::getRecordFormOpcode(MIOpC);
@@ -1912,14 +1909,36 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// compare).
// Rotates are expensive instructions. If we're emitting a record-form
- // rotate that can just be an andi, we should just emit the andi.
- if ((MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) &&
- MI->getOperand(2).getImm() == 0) {
+ // rotate that can just be an andi/andis, we should just emit that.
+ if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
+ unsigned GPRRes = MI->getOperand(0).getReg();
+ int64_t SH = MI->getOperand(2).getImm();
int64_t MB = MI->getOperand(3).getImm();
int64_t ME = MI->getOperand(4).getImm();
- if (MB < ME && MB >= 16) {
- uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
- NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIo : PPC::ANDIo8;
+ // We can only do this if both the start and end of the mask are in the
+ // same halfword.
+ bool MBInLoHWord = MB >= 16;
+ bool MEInLoHWord = ME >= 16;
+ uint64_t Mask = ~0LLU;
+
+ if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
+ Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
+ // The mask value needs to shift right 16 if we're emitting andis.
+ Mask >>= MBInLoHWord ? 0 : 16;
+ NewOpC = MIOpC == PPC::RLWINM ?
+ (MBInLoHWord ? PPC::ANDIo : PPC::ANDISo) :
+ (MBInLoHWord ? PPC::ANDIo8 :PPC::ANDISo8);
+ } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
+ (ME - MB + 1 == SH) && (MB >= 16)) {
+ // If we are rotating by the exact number of bits as are in the mask
+ // and the mask is in the least significant bits of the register,
+ // that's just an andis. (as long as the GPR result has no uses).
+ Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
+ Mask >>= 16;
+ NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDISo :PPC::ANDISo8;
+ }
+ // If we've set the mask, we can transform.
+ if (Mask != ~0LLU) {
MI->RemoveOperand(4);
MI->RemoveOperand(3);
MI->getOperand(2).setImm(Mask);
@@ -2088,11 +2107,9 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
return true;
}
-#ifndef NDEBUG
static bool isAnImmediateOperand(const MachineOperand &MO) {
return MO.isCPI() || MO.isGlobal() || MO.isImm();
}
-#endif
bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto &MBB = *MI.getParent();
@@ -2231,6 +2248,35 @@ static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
return PPC::NoRegister;
}
+void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
+ unsigned OpNo,
+ int64_t Imm) const {
+ assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
+ // Replace the REG with the Immediate.
+ unsigned InUseReg = MI.getOperand(OpNo).getReg();
+ MI.getOperand(OpNo).ChangeToImmediate(Imm);
+
+ if (empty(MI.implicit_operands()))
+ return;
+
+ // We need to make sure that the MI didn't have any implicit use
+ // of this REG any more.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI);
+ if (UseOpIdx >= 0) {
+ MachineOperand &MO = MI.getOperand(UseOpIdx);
+ if (MO.isImplicit())
+ // The operands must always be in the following order:
+ // - explicit reg defs,
+ // - other explicit operands (reg uses, immediates, etc.),
+ // - implicit reg defs
+ // - implicit reg uses
+ // Therefore, removing the implicit operand won't change the explicit
+ // operands layout.
+ MI.RemoveOperand(UseOpIdx);
+ }
+}
+
// Replace an instruction with one that materializes a constant (and sets
// CR0 if the original instruction was a record-form instruction).
void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
@@ -2256,10 +2302,11 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
.addImm(LII.Imm);
}
-MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
- unsigned &ConstOp,
- bool &SeenIntermediateUse) const {
- ConstOp = ~0U;
+MachineInstr *PPCInstrInfo::getForwardingDefMI(
+ MachineInstr &MI,
+ unsigned &OpNoForForwarding,
+ bool &SeenIntermediateUse) const {
+ OpNoForForwarding = ~0U;
MachineInstr *DefMI = nullptr;
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2276,7 +2323,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
- ConstOp = i;
+ OpNoForForwarding = i;
break;
}
}
@@ -2297,7 +2344,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
- if (!instrHasImmForm(MI, III) && !ConvertibleImmForm)
+ if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm)
return nullptr;
// Don't convert or %X, %Y, %Y since that's just a register move.
@@ -2319,15 +2366,22 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
if (PPC::G8RCRegClass.contains(Reg))
Reg = Reg - PPC::X0 + PPC::R0;
- // Is this register defined by a load-immediate in this block?
+ // Is this register defined by some form of add-immediate (including
+ // load-immediate) within this basic block?
for ( ; It != E; ++It) {
if (It->modifiesRegister(Reg, &getRegisterInfo())) {
- if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) {
- ConstOp = i;
+ switch (It->getOpcode()) {
+ default: break;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::ADDItocL:
+ case PPC::ADDI:
+ case PPC::ADDI8:
+ OpNoForForwarding = i;
return &*It;
- } else
- break;
- } else if (It->readsRegister(Reg, &getRegisterInfo()))
+ }
+ break;
+ } else if (It->readsRegister(Reg, &getRegisterInfo()))
// If we see another use of this reg between the def and the MI,
// we want to flat it so the def isn't deleted.
SeenIntermediateUse = true;
@@ -2335,7 +2389,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
}
}
}
- return ConstOp == ~0U ? nullptr : DefMI;
+ return OpNoForForwarding == ~0U ? nullptr : DefMI;
}
const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
@@ -2371,35 +2425,48 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
}
// If this instruction has an immediate form and one of its operands is a
-// result of a load-immediate, convert it to the immediate form if the constant
-// is in range.
+// result of a load-immediate or an add-immediate, convert it to
+// the immediate form if the constant is in range.
bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef) const {
MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
bool PostRA = !MRI->isSSA();
bool SeenIntermediateUse = true;
- unsigned ConstantOperand = ~0U;
- MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand,
- SeenIntermediateUse);
- if (!DefMI || !DefMI->getOperand(1).isImm())
+ unsigned ForwardingOperand = ~0U;
+ MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
+ SeenIntermediateUse);
+ if (!DefMI)
+ return false;
+ assert(ForwardingOperand < MI.getNumOperands() &&
+ "The forwarding operand needs to be valid at this point");
+ bool KillFwdDefMI = !SeenIntermediateUse &&
+ MI.getOperand(ForwardingOperand).isKill();
+ if (KilledDef && KillFwdDefMI)
+ *KilledDef = DefMI;
+
+ ImmInstrInfo III;
+ bool HasImmForm = instrHasImmForm(MI, III, PostRA);
+ // If this is a reg+reg instruction that has a reg+imm form,
+ // and one of the operands is produced by an add-immediate,
+ // try to convert it.
+ if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand,
+ *DefMI, KillFwdDefMI))
+ return true;
+
+ if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
+ !DefMI->getOperand(1).isImm())
return false;
- assert(ConstantOperand < MI.getNumOperands() &&
- "The constant operand needs to be valid at this point");
int64_t Immediate = DefMI->getOperand(1).getImm();
// Sign-extend to 64-bits.
int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
(Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
- if (KilledDef && MI.getOperand(ConstantOperand).isKill() &&
- !SeenIntermediateUse)
- *KilledDef = DefMI;
-
- // If this is a reg+reg instruction that has a reg+imm form, convert it now.
- ImmInstrInfo III;
- if (instrHasImmForm(MI, III))
- return transformToImmForm(MI, III, ConstantOperand, SExtImm);
+ // If this is a reg+reg instruction that has a reg+imm form,
+ // and one of the operands is produced by LI, convert it now.
+ if (HasImmForm)
+ return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm);
bool ReplaceWithLI = false;
bool Is64BitLI = false;
@@ -2443,7 +2510,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
- CompareUseMI.getOperand(1).ChangeToImmediate(0);
+ replaceInstrOperandWithImm(CompareUseMI, 1, 0);
CompareUseMI.RemoveOperand(3);
CompareUseMI.RemoveOperand(2);
continue;
@@ -2602,18 +2669,23 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
+static bool isVFReg(unsigned Reg) {
+ return PPC::VFRCRegClass.contains(Reg);
+}
+
bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
- ImmInstrInfo &III) const {
+ ImmInstrInfo &III, bool PostRA) const {
unsigned Opc = MI.getOpcode();
// The vast majority of the instructions would need their operand 2 replaced
// with an immediate when switching to the reg+imm form. A marked exception
// are the update form loads/stores for which a constant operand 2 would need
// to turn into a displacement and move operand 1 to the operand 2 position.
III.ImmOpNo = 2;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
III.ImmWidth = 16;
III.ImmMustBeMultipleOf = 1;
III.TruncateImmTo = 0;
+ III.IsSummingOperands = false;
switch (Opc) {
default: return false;
case PPC::ADD4:
@@ -2622,6 +2694,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 1;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
break;
case PPC::ADDC:
@@ -2630,6 +2703,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
break;
case PPC::ADDCo:
@@ -2637,6 +2711,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = PPC::ADDICo;
break;
case PPC::SUBFC:
@@ -2809,8 +2884,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 1;
III.ZeroIsSpecialNew = 2;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpNo = 1;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
@@ -2866,8 +2942,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 2;
III.ZeroIsSpecialNew = 3;
III.IsCommutative = false;
+ III.IsSummingOperands = true;
III.ImmOpNo = 2;
- III.ConstantOpNo = 3;
+ III.OpNoForForwarding = 3;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
@@ -2898,21 +2975,30 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
}
break;
- // Power9 only.
+ // Power9 and up only. For some of these, the X-Form version has access to all
+ // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
+ // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
+ // into or stored from is one of the VR registers.
case PPC::LXVX:
case PPC::LXSSPX:
case PPC::LXSDX:
case PPC::STXVX:
case PPC::STXSSPX:
case PPC::STXSDX:
+ case PPC::XFLOADf32:
+ case PPC::XFLOADf64:
+ case PPC::XFSTOREf32:
+ case PPC::XFSTOREf64:
if (!Subtarget.hasP9Vector())
return false;
III.SignedImm = true;
III.ZeroIsSpecialOrig = 1;
III.ZeroIsSpecialNew = 2;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpNo = 1;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
+ III.ImmMustBeMultipleOf = 4;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LXVX:
@@ -2920,24 +3006,64 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ImmMustBeMultipleOf = 16;
break;
case PPC::LXSSPX:
- III.ImmOpcode = PPC::LXSSP;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::LXSSP;
+ else {
+ III.ImmOpcode = PPC::LFS;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFLOADf32:
+ III.ImmOpcode = PPC::DFLOADf32;
break;
case PPC::LXSDX:
- III.ImmOpcode = PPC::LXSD;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::LXSD;
+ else {
+ III.ImmOpcode = PPC::LFD;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFLOADf64:
+ III.ImmOpcode = PPC::DFLOADf64;
break;
case PPC::STXVX:
III.ImmOpcode = PPC::STXV;
III.ImmMustBeMultipleOf = 16;
break;
case PPC::STXSSPX:
- III.ImmOpcode = PPC::STXSSP;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::STXSSP;
+ else {
+ III.ImmOpcode = PPC::STFS;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFSTOREf32:
+ III.ImmOpcode = PPC::DFSTOREf32;
break;
case PPC::STXSDX:
- III.ImmOpcode = PPC::STXSD;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::STXSD;
+ else {
+ III.ImmOpcode = PPC::STFD;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFSTOREf64:
+ III.ImmOpcode = PPC::DFSTOREf64;
break;
}
break;
@@ -2984,13 +3110,264 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
}
}
-bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo,
- int64_t Imm) const {
+// Check if the 'MI' that has the index OpNoForForwarding
+// meets the requirement described in the ImmInstrInfo.
+bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned OpNoForForwarding
+ ) const {
+ // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
+ // would not work pre-RA, we can only do the check post RA.
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA())
+ return false;
+
+ // Cannot do the transform if MI isn't summing the operands.
+ if (!III.IsSummingOperands)
+ return false;
+
+ // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
+ if (!III.ZeroIsSpecialOrig)
+ return false;
+
+ // We cannot do the transform if the operand we are trying to replace
+ // isn't the same as the operand the instruction allows.
+ if (OpNoForForwarding != III.OpNoForForwarding)
+ return false;
+
+ // Check if the instruction we are trying to transform really has
+ // the special zero register as its operand.
+ if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
+ MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
+ return false;
+
+ // This machine instruction is convertible if it is,
+ // 1. summing the operands.
+ // 2. one of the operands is special zero register.
+ // 3. the operand we are trying to replace is allowed by the MI.
+ return true;
+}
+
+// Check if the DefMI is the add inst and set the ImmMO and RegMO
+// accordingly.
+bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ MachineOperand *&ImmMO,
+ MachineOperand *&RegMO) const {
+ unsigned Opc = DefMI.getOpcode();
+ if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
+ return false;
+
+ assert(DefMI.getNumOperands() >= 3 &&
+ "Add inst must have at least three operands");
+ RegMO = &DefMI.getOperand(1);
+ ImmMO = &DefMI.getOperand(2);
+
+ // This DefMI is elgible for forwarding if it is:
+ // 1. add inst
+ // 2. one of the operands is Imm/CPI/Global.
+ return isAnImmediateOperand(*ImmMO);
+}
+
+bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
+ const MachineInstr &DefMI,
+ const MachineInstr &MI,
+ bool KillDefMI
+ ) const {
+ // x = addi y, imm
+ // ...
+ // z = lfdx 0, x -> z = lfd imm(y)
+ // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
+ // of "y" between the DEF of "x" and "z".
+ // The query is only valid post RA.
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA())
+ return false;
+
+ // MachineInstr::readsRegister only returns true if the machine
+ // instruction reads the exact register or its super-register. It
+ // does not consider uses of sub-registers which seems like strange
+ // behaviour. Nonetheless, if we end up with a 64-bit register here,
+ // get the corresponding 32-bit register to check.
+ unsigned Reg = RegMO.getReg();
+ if (PPC::G8RCRegClass.contains(Reg))
+ Reg = Reg - PPC::X0 + PPC::R0;
+
+ // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
+ MachineBasicBlock::const_reverse_iterator It = MI;
+ MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
+ It++;
+ for (; It != E; ++It) {
+ if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+ return false;
+ // Made it to DefMI without encountering a clobber.
+ if ((&*It) == &DefMI)
+ break;
+ }
+ assert((&*It) == &DefMI && "DefMI is missing");
+
+ // If DefMI also uses the register to be forwarded, we can only forward it
+ // if DefMI is being erased.
+ if (DefMI.readsRegister(Reg, &getRegisterInfo()))
+ return KillDefMI;
+
+ return true;
+}
+
+bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
+ const MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ int64_t &Imm) const {
+ assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
+ if (DefMI.getOpcode() == PPC::ADDItocL) {
+ // The operand for ADDItocL is CPI, which isn't imm at compiling time,
+ // However, we know that, it is 16-bit width, and has the alignment of 4.
+ // Check if the instruction met the requirement.
+ if (III.ImmMustBeMultipleOf > 4 ||
+ III.TruncateImmTo || III.ImmWidth != 16)
+ return false;
+
+ // Going from XForm to DForm loads means that the displacement needs to be
+ // not just an immediate but also a multiple of 4, or 16 depending on the
+ // load. A DForm load cannot be represented if it is a multiple of say 2.
+ // XForm loads do not have this restriction.
+ if (ImmMO.isGlobal() &&
+ ImmMO.getGlobal()->getAlignment() < III.ImmMustBeMultipleOf)
+ return false;
+
+ return true;
+ }
+
+ if (ImmMO.isImm()) {
+ // It is Imm, we need to check if the Imm fit the range.
+ int64_t Immediate = ImmMO.getImm();
+ // Sign-extend to 64-bits.
+ Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
+ (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
+
+ if (Imm % III.ImmMustBeMultipleOf)
+ return false;
+ if (III.TruncateImmTo)
+ Imm &= ((1 << III.TruncateImmTo) - 1);
+ if (III.SignedImm) {
+ APInt ActualValue(64, Imm, true);
+ if (!ActualValue.isSignedIntN(III.ImmWidth))
+ return false;
+ } else {
+ uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
+ if ((uint64_t)Imm > UnsignedMax)
+ return false;
+ }
+ }
+ else
+ return false;
+
+ // This ImmMO is forwarded if it meets the requriement describle
+ // in ImmInstrInfo
+ return true;
+}
+
+// If an X-Form instruction is fed by an add-immediate and one of its operands
+// is the literal zero, attempt to forward the source of the add-immediate to
+// the corresponding D-Form instruction with the displacement coming from
+// the immediate being added.
+bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned OpNoForForwarding,
+ MachineInstr &DefMI,
+ bool KillDefMI) const {
+ // RegMO ImmMO
+ // | |
+ // x = addi reg, imm <----- DefMI
+ // y = op 0 , x <----- MI
+ // |
+ // OpNoForForwarding
+ // Check if the MI meet the requirement described in the III.
+ if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
+ return false;
+
+ // Check if the DefMI meet the requirement
+ // described in the III. If yes, set the ImmMO and RegMO accordingly.
+ MachineOperand *ImmMO = nullptr;
+ MachineOperand *RegMO = nullptr;
+ if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
+ return false;
+ assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
+
+ // As we get the Imm operand now, we need to check if the ImmMO meet
+ // the requirement described in the III. If yes set the Imm.
+ int64_t Imm = 0;
+ if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
+ return false;
+
+ // Check if the RegMO can be forwarded to MI.
+ if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI))
+ return false;
+
+ // We know that, the MI and DefMI both meet the pattern, and
+ // the Imm also meet the requirement with the new Imm-form.
+ // It is safe to do the transformation now.
+ LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "Fed by:\n");
+ LLVM_DEBUG(DefMI.dump());
+
+ // Update the base reg first.
+ MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
+ false, false,
+ RegMO->isKill());
+
+ // Then, update the imm.
+ if (ImmMO->isImm()) {
+ // If the ImmMO is Imm, change the operand that has ZERO to that Imm
+ // directly.
+ replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm);
+ }
+ else {
+ // Otherwise, it is Constant Pool Index(CPI) or Global,
+ // which is relocation in fact. We need to replace the special zero
+ // register with ImmMO.
+ // Before that, we need to fixup the target flags for imm.
+ // For some reason, we miss to set the flag for the ImmMO if it is CPI.
+ if (DefMI.getOpcode() == PPC::ADDItocL)
+ ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
+
+ // MI didn't have the interface such as MI.setOperand(i) though
+ // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
+ // ImmMO, we need to remove ZERO operand and all the operands behind it,
+ // and, add the ImmMO, then, move back all the operands behind ZERO.
+ SmallVector<MachineOperand, 2> MOps;
+ for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
+ MOps.push_back(MI.getOperand(i));
+ MI.RemoveOperand(i);
+ }
+
+ // Remove the last MO in the list, which is ZERO operand in fact.
+ MOps.pop_back();
+ // Add the imm operand.
+ MI.addOperand(*ImmMO);
+ // Now add the rest back.
+ for (auto &MO : MOps)
+ MI.addOperand(MO);
+ }
+
+ // Update the opcode.
+ MI.setDesc(get(III.ImmOpcode));
+
+ LLVM_DEBUG(dbgs() << "With:\n");
+ LLVM_DEBUG(MI.dump());
+
+ return true;
+}
+
+bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned ConstantOpNo,
+ int64_t Imm) const {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
// Exit early if we can't convert this.
- if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative)
+ if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
return false;
if (Imm % III.ImmMustBeMultipleOf)
return false;
@@ -3035,7 +3412,7 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
MI.setDesc(get(III.ImmOpcode));
- if (ConstantOpNo == III.ConstantOpNo) {
+ if (ConstantOpNo == III.OpNoForForwarding) {
// Converting shifts to immediate form is a bit tricky since they may do
// one of three things:
// 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
@@ -3063,42 +3440,47 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
uint64_t MB = RightShift ? ShAmt : 0;
uint64_t ME = RightShift ? 31 : 31 - ShAmt;
- MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+ replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
.addImm(ME);
} else {
// Left shifts use (N, 63-N), right shifts use (64-N, N).
uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
- MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+ replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
}
}
} else
- MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
+ replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
}
// Convert commutative instructions (switch the operands and convert the
// desired one to an immediate.
else if (III.IsCommutative) {
- MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
- swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo);
+ replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
+ swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
} else
llvm_unreachable("Should have exited early!");
// For instructions for which the constant register replaces a different
// operand than where the immediate goes, we need to swap them.
- if (III.ConstantOpNo != III.ImmOpNo)
- swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo);
+ if (III.OpNoForForwarding != III.ImmOpNo)
+ swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo);
- // If the R0/X0 register is special for the original instruction and not for
- // the new instruction (or vice versa), we need to fix up the register class.
+ // If the special R0/X0 register index are different for original instruction
+ // and new instruction, we need to fix up the register class in new
+ // instruction.
if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
- if (!III.ZeroIsSpecialOrig) {
+ if (III.ZeroIsSpecialNew) {
+ // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
+ // need to fix up register class.
unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
- const TargetRegisterClass *NewRC =
- MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
- &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
- MRI.setRegClass(RegToModify, NewRC);
+ if (TargetRegisterInfo::isVirtualRegister(RegToModify)) {
+ const TargetRegisterClass *NewRC =
+ MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
+ &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
+ MRI.setRegClass(RegToModify, NewRC);
+ }
}
}
return true;
@@ -3140,6 +3522,7 @@ static bool isSignExtendingOp(const MachineInstr &MI) {
Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo ||
Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 ||
Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo ||
+ Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
Opcode == PPC::EXTSB8_32_64)
return true;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index ba82f56a2464..7ed558b835af 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -91,8 +91,8 @@ struct ImmInstrInfo {
uint64_t ZeroIsSpecialNew : 3;
// Is the operation commutative?
uint64_t IsCommutative : 1;
- // The operand number to check for load immediate.
- uint64_t ConstantOpNo : 3;
+ // The operand number to check for add-immediate def.
+ uint64_t OpNoForForwarding : 3;
// The operand number for the immediate.
uint64_t ImmOpNo : 3;
// The opcode of the new instruction.
@@ -101,6 +101,8 @@ struct ImmInstrInfo {
uint64_t ImmWidth : 5;
// The immediate should be truncated to N bits.
uint64_t TruncateImmTo : 5;
+ // Is the instruction summing the operand
+ uint64_t IsSummingOperands : 1;
};
// Information required to convert an instruction to just a materialized
@@ -123,10 +125,42 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
- bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo, int64_t Imm) const;
- MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp,
- bool &SeenIntermediateUse) const;
+
+ // If the inst has imm-form and one of its operand is produced by a LI,
+ // put the imm into the inst directly and remove the LI if possible.
+ bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned ConstantOpNo, int64_t Imm) const;
+ // If the inst has imm-form and one of its operand is produced by an
+ // add-immediate, try to transform it when possible.
+ bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned ConstantOpNo,
+ MachineInstr &DefMI,
+ bool KillDefMI) const;
+ // Try to find that, if the instruction 'MI' contains any operand that
+ // could be forwarded from some inst that feeds it. If yes, return the
+ // Def of that operand. And OpNoForForwarding is the operand index in
+ // the 'MI' for that 'Def'. If we see another use of this Def between
+ // the Def and the MI, SeenIntermediateUse becomes 'true'.
+ MachineInstr *getForwardingDefMI(MachineInstr &MI,
+ unsigned &OpNoForForwarding,
+ bool &SeenIntermediateUse) const;
+
+ // Can the user MI have it's source at index \p OpNoForForwarding
+ // forwarded from an add-immediate that feeds it?
+ bool isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned OpNoForForwarding) const;
+ bool isDefMIElgibleForForwarding(MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ MachineOperand *&ImmMO,
+ MachineOperand *&RegMO) const;
+ bool isImmElgibleForForwarding(const MachineOperand &ImmMO,
+ const MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ int64_t &Imm) const;
+ bool isRegElgibleForForwarding(const MachineOperand &RegMO,
+ const MachineInstr &DefMI,
+ const MachineInstr &MI,
+ bool KillDefMI) const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
virtual void anchor();
@@ -158,6 +192,16 @@ public:
bool isXFormMemOp(unsigned Opcode) const {
return get(Opcode).TSFlags & PPCII::XFormMemOp;
}
+ static bool isSameClassPhysRegCopy(unsigned Opcode) {
+ unsigned CopyOpcodes[] =
+ { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+ PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
+ PPC::CROR, PPC::EVOR, -1U };
+ for (int i = 0; CopyOpcodes[i] != -1U; i++)
+ if (Opcode == CopyOpcodes[i])
+ return true;
+ return false;
+ }
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
@@ -369,8 +413,30 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
-
- bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
+ void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
+ int64_t Imm) const;
+
+ bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III,
+ bool PostRA) const;
+
+ /// getRegNumForOperand - some operands use different numbering schemes
+ /// for the same registers. For example, a VSX instruction may have any of
+ /// vs0-vs63 allocated whereas an Altivec instruction could only have
+ /// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual
+ /// register number needed for the opcode/operand number combination.
+ /// The operand number argument will be useful when we need to extend this
+ /// to instructions that use both Altivec and VSX numbering (for different
+ /// operands).
+ static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
+ unsigned OpNo) {
+ if (Desc.TSFlags & PPCII::UseVSXReg) {
+ if (isVRRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::V0);
+ else if (isVFRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::VF0);
+ }
+ return Reg;
+ }
};
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1a43037e4a4b..dd3f1ac79089 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -114,6 +114,10 @@ def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;
+def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@@ -218,6 +222,8 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
+
// Move 2 i64 values into a VSX register
def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
SDTypeProfile<1, 2,
@@ -1189,77 +1195,76 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
-// Pseudo-instructions:
+// Pseudo instructions:
let hasCtrlDep = 1 in {
let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+def ADJCALLSTACKDOWN : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
"#ADJCALLSTACKDOWN $amt1 $amt2",
[(callseq_start timm:$amt1, timm:$amt2)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
"#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
+def UPDATE_VRSAVE : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS),
"UPDATE_VRSAVE $rD, $rS", []>;
}
let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
+def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
[(set i32:$result,
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
-def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
+def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
[(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
-let usesCustomInserter = 1, // Expanded after instruction selection.
- PPC970_Single = 1 in {
+let PPC970_Single = 1 in {
// Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
// because either operand might become the first operand in an isel, and
// that operand cannot be r0.
- def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+ def SELECT_CC_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crrc:$cond,
gprc_nor0:$T, gprc_nor0:$F,
i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
- def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+ def SELECT_CC_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crrc:$cond,
g8rc_nox0:$T, g8rc_nox0:$F,
i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
- def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ def SELECT_CC_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
i32imm:$BROPC), "#SELECT_CC_F4",
[]>;
- def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ def SELECT_CC_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
i32imm:$BROPC), "#SELECT_CC_F8",
[]>;
- def SELECT_CC_F16 : Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+ def SELECT_CC_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_F16",
[]>;
- def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+ def SELECT_CC_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_VRRC",
[]>;
// SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
// register bit directly.
- def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond,
+ def SELECT_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crbitrc:$cond,
gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4",
[(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>;
- def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
+ def SELECT_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8",
[(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>;
let Predicates = [HasFPU] in {
- def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
+ def SELECT_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
f4rc:$T, f4rc:$F), "#SELECT_F4",
[(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
- def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
+ def SELECT_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
f8rc:$T, f8rc:$F), "#SELECT_F8",
[(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_F16 : Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ def SELECT_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
vrrc:$T, vrrc:$F), "#SELECT_F16",
[(set f128:$dst, (select i1:$cond, f128:$T, f128:$F))]>;
}
- def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ def SELECT_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
vrrc:$T, vrrc:$F), "#SELECT_VRRC",
[(set v4i32:$dst,
(select i1:$cond, v4i32:$T, v4i32:$F))]>;
@@ -1268,18 +1273,18 @@ let Predicates = [HasFPU] in {
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
let mayStore = 1 in {
-def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
+def SPILL_CR : PPCEmitTimePseudo<(outs), (ins crrc:$cond, memri:$F),
"#SPILL_CR", []>;
-def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F),
+def SPILL_CRBIT : PPCEmitTimePseudo<(outs), (ins crbitrc:$cond, memri:$F),
"#SPILL_CRBIT", []>;
}
// RESTORE_CR - Indicate that we're restoring the CR register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in {
-def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
+def RESTORE_CR : PPCEmitTimePseudo<(outs crrc:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
-def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
+def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
"#RESTORE_CRBIT", []>;
}
@@ -1305,10 +1310,10 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
let Defs = [LR] in
- def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
+ def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
let Defs = [LR] in
- def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+ def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -1506,19 +1511,19 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNdi :Pseudo< (outs),
+def TCRETURNdi :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+def TCRETURNai :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa $func $offset",
[(PPCtc_return (i32 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
+def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
"#TC_RETURNr $dst $offset",
[]>;
@@ -1544,14 +1549,19 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
}
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
+// is not.
+let hasSideEffects = 1 in {
let Defs = [CTR] in
- def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ def EH_SjLj_SetJmp32 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP32",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In32BitMode]>;
+}
+
+let hasSideEffects = 1, isBarrier = 1 in {
let isTerminator = 1 in
- def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ def EH_SjLj_LongJmp32 : PPCCustomInserterPseudo<(outs), (ins memr:$buf),
"#EH_SJLJ_LONGJMP32",
[(PPCeh_sjlj_longjmp addr:$buf)]>,
Requires<[In32BitMode]>;
@@ -1561,7 +1571,7 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
// a terminator. Size is set to 0 to prevent the builtin assembler
// from emitting it.
let isBranch = 1, isTerminator = 1, Size = 0 in {
- def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ def EH_SjLj_Setup : PPCEmitTimePseudo<(outs), (ins directbrtarget:$dst),
"#EH_SjLj_Setup\t$dst", []>;
}
@@ -1648,119 +1658,117 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
// clean this up in PPCMIPeephole with calls to
// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
// in the first place.
-let usesCustomInserter = 1 in {
- let Defs = [CR0] in {
- def ATOMIC_LOAD_ADD_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
- [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
- [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
- [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
- [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
- [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
- [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
- [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
- [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
- [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
- [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_ADD_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
- [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
- [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
- [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
- [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
- [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
- [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
- [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
- [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
- [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
- [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_ADD_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
- [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
- [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
- [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
- [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
- [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
- [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
- [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
- [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
- [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
- [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
-
- def ATOMIC_CMP_SWAP_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
- [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
- def ATOMIC_CMP_SWAP_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
- [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
- def ATOMIC_CMP_SWAP_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
- [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
-
- def ATOMIC_SWAP_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
- [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
- def ATOMIC_SWAP_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
- [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
- def ATOMIC_SWAP_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
- [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
- }
+let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
+ [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
+ [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
+ [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
+ [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
+ [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
+ [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
+ [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
+ [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
+ [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
+ [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
+ [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
+ [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
+
+ def ATOMIC_SWAP_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
}
def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
@@ -1988,15 +1996,15 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
// Unindexed (r+i) Stores.
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
-def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
- "stb $rS, $src", IIC_LdStStore,
- [(truncstorei8 i32:$rS, iaddr:$src)]>;
-def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
- "sth $rS, $src", IIC_LdStStore,
- [(truncstorei16 i32:$rS, iaddr:$src)]>;
-def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
- "stw $rS, $src", IIC_LdStStore,
- [(store i32:$rS, iaddr:$src)]>;
+def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst),
+ "stb $rS, $dst", IIC_LdStStore,
+ [(truncstorei8 i32:$rS, iaddr:$dst)]>;
+def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst),
+ "sth $rS, $dst", IIC_LdStStore,
+ [(truncstorei16 i32:$rS, iaddr:$dst)]>;
+def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst),
+ "stw $rS, $dst", IIC_LdStStore,
+ [(store i32:$rS, iaddr:$dst)]>;
let Predicates = [HasFPU] in {
def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
"stfs $rS, $dst", IIC_LdStSTFD,
@@ -2010,13 +2018,13 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
let Predicates = [HasFPU] in {
def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
@@ -2084,19 +2092,19 @@ def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
@@ -2543,8 +2551,8 @@ def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
// A pseudo-instruction used to implement the read of the 64-bit cycle counter
// on a 32-bit target.
-let hasSideEffects = 1, usesCustomInserter = 1 in
-def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins),
+let hasSideEffects = 1 in
+def ReadTB : PPCCustomInserterPseudo<(outs gprc:$lo, gprc:$hi), (ins),
"#ReadTB", []>;
let Uses = [CTR] in {
@@ -2603,13 +2611,13 @@ def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>;
// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
// so we'll need to scavenge a register for it.
let mayStore = 1 in
-def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
"#SPILL_VRSAVE", []>;
// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in
-def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
let hasSideEffects = 0 in {
@@ -2648,9 +2656,9 @@ def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins),
} // hasSideEffects = 0
let Predicates = [HasFPU] in {
-// Pseudo instruction to perform FADD in round-to-zero mode.
-let usesCustomInserter = 1, Uses = [RM] in {
- def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
+// Custom inserter instruction to perform FADD in round-to-zero mode.
+let Uses = [RM] in {
+ def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
[(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
}
@@ -3022,23 +3030,23 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS $in, tblockaddress:$g)>;
// Support for thread-local storage.
-def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
[(set i32:$rD, (PPCppc32GOT))]>;
// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
// This uses two output registers, the first as the real output, the second as a
// temporary register, used internally in code generation.
-def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
[]>, NoEncode<"$rT">;
-def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
"#LDgotTprelL32",
[(set i32:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
(ADD4TLS $in, tglobaltlsaddr:$g)>;
-def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDItlsgdL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsgdL32",
[(set i32:$rD,
(PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
@@ -3046,7 +3054,7 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsADDR32",
[(set i32:$rD,
(PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
@@ -3054,14 +3062,14 @@ def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
+def ADDItlsgdLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD),
(ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
"#ADDItlsgdLADDR32",
[(set i32:$rD,
(PPCaddiTlsgdLAddr i32:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>;
-def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDItlsldL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsldL32",
[(set i32:$rD,
(PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
@@ -3069,7 +3077,7 @@ def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+def GETtlsldADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsldADDR32",
[(set i32:$rD,
(PPCgetTlsldAddr i32:$reg,
@@ -3078,31 +3086,31 @@ def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
+def ADDItlsldLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD),
(ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
"#ADDItlsldLADDR32",
[(set i32:$rD,
(PPCaddiTlsldLAddr i32:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>;
-def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDIdtprelL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDIdtprelL32",
[(set i32:$rD,
(PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
-def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDISdtprelHA32",
[(set i32:$rD,
(PPCaddisDtprelHA i32:$reg,
tglobaltlsaddr:$disp))]>;
// Support for Position-independent code
-def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
"#LWZtoc",
[(set i32:$rD,
(PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
// Get Global (GOT) Base Register offset, from the word immediately preceding
// the function label.
-def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
// Standard shifts. These are represented separately from the real shifts above
@@ -3930,21 +3938,19 @@ def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
(SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-let usesCustomInserter = 1 in {
-def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
"#ANDIo_1_EQ_BIT",
[(set i1:$dst, (trunc (not i32:$in)))]>;
-def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
"#ANDIo_1_GT_BIT",
[(set i1:$dst, (trunc i32:$in))]>;
-def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
"#ANDIo_1_EQ_BIT8",
[(set i1:$dst, (trunc (not i64:$in)))]>;
-def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
"#ANDIo_1_GT_BIT8",
[(set i1:$dst, (trunc i64:$in))]>;
-}
def : Pat<(i1 (not (trunc i32:$in))),
(ANDIo_1_EQ_BIT $in)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index c4bb02695b36..ef589ad01fd7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -245,32 +245,30 @@ let Uses = [RM] in {
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
- let usesCustomInserter = 1 in {
- def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QFRC",
- []>;
- def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QSRC",
- []>;
- def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QBRC",
- []>;
-
- // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
- // register bit directly.
- def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
- qfrc:$T, qfrc:$F), "#SELECT_QFRC",
- [(set v4f64:$dst,
- (select i1:$cond, v4f64:$T, v4f64:$F))]>;
- def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
- qsrc:$T, qsrc:$F), "#SELECT_QSRC",
- [(set v4f32:$dst,
- (select i1:$cond, v4f32:$T, v4f32:$F))]>;
- def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
- qbrc:$T, qbrc:$F), "#SELECT_QBRC",
- [(set v4i1:$dst,
- (select i1:$cond, v4i1:$T, v4i1:$F))]>;
- }
+ def SELECT_CC_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QFRC",
+ []>;
+ def SELECT_CC_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QSRC",
+ []>;
+ def SELECT_CC_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QBRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+ qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+ [(set v4f64:$dst,
+ (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+ def SELECT_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+ qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+ [(set v4f32:$dst,
+ (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+ def SELECT_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+ qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+ [(set v4i1:$dst,
+ (select i1:$cond, v4i1:$T, v4i1:$F))]>;
// Convert and Round Instructions
def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 96649efdc1bc..9f5891a45f22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -831,22 +831,20 @@ def : Pat<(f64 (fpextend f32:$src)),
}
let Predicates = [HasSPE] in {
- let usesCustomInserter = 1 in {
-def SELECT_CC_SPE4 : Pseudo<(outs spe4rc:$dst),
+def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst),
(ins crrc:$cond, spe4rc:$T, spe4rc:$F,
i32imm:$BROPC), "#SELECT_CC_SPE4",
[]>;
-def SELECT_CC_SPE : Pseudo<(outs sperc:$dst),
+def SELECT_CC_SPE : PPCCustomInserterPseudo<(outs sperc:$dst),
(ins crrc:$cond, sperc:$T, sperc:$F, i32imm:$BROPC),
"#SELECT_CC_SPE",
[]>;
-def SELECT_SPE4 : Pseudo<(outs spe4rc:$dst), (ins crbitrc:$cond,
+def SELECT_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crbitrc:$cond,
spe4rc:$T, spe4rc:$F), "#SELECT_SPE4",
[(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
-def SELECT_SPE : Pseudo<(outs sperc:$dst), (ins crbitrc:$cond,
+def SELECT_SPE : PPCCustomInserterPseudo<(outs sperc:$dst), (ins crbitrc:$cond,
sperc:$T, sperc:$F), "#SELECT_SPE",
[(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
- }
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
(SELECT_SPE4 (CRANDC $lhs, $rhs), $tval, $fval)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 781a3277441a..0f073388dc74 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -67,6 +67,10 @@ def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
def SDTVecConv : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>
]>;
+def SDTVabsd : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
+]>;
+
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -79,6 +83,7 @@ def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
+def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -132,7 +137,7 @@ let Uses = [RM] in {
[]>;
// Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later
- let isPseudo = 1, CodeSize = 3 in
+ let CodeSize = 3 in
def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
"#XFLOADf64",
[(set f64:$XT, (load xoaddr:$src))]>;
@@ -163,7 +168,7 @@ let Uses = [RM] in {
[]>;
// Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later
- let isPseudo = 1, CodeSize = 3 in
+ let CodeSize = 3 in
def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
"#XFSTOREf64",
[(store f64:$XT, xoaddr:$dst)]>;
@@ -898,37 +903,36 @@ let Uses = [RM] in {
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
-let usesCustomInserter = 1, // Expanded after instruction selection.
- PPC970_Single = 1 in {
+let PPC970_Single = 1 in {
- def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+ def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
(ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
"#SELECT_CC_VSRC",
[]>;
- def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+ def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
(ins crbitrc:$cond, vsrc:$T, vsrc:$F),
"#SELECT_VSRC",
[(set v2f64:$dst,
(select i1:$cond, v2f64:$T, v2f64:$F))]>;
- def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+ def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
(ins crrc:$cond, f8rc:$T, f8rc:$F,
i32imm:$BROPC), "#SELECT_CC_VSFRC",
[]>;
- def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+ def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
(ins crbitrc:$cond, f8rc:$T, f8rc:$F),
"#SELECT_VSFRC",
[(set f64:$dst,
(select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst),
+ def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
(ins crrc:$cond, f4rc:$T, f4rc:$F,
i32imm:$BROPC), "#SELECT_CC_VSSRC",
[]>;
- def SELECT_VSSRC: Pseudo<(outs f4rc:$dst),
+ def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
(ins crbitrc:$cond, f4rc:$T, f4rc:$F),
"#SELECT_VSSRC",
[(set f32:$dst,
(select i1:$cond, f32:$T, f32:$F))]>;
-} // usesCustomInserter
+}
} // AddedComplexity
def : InstAlias<"xvmovdp $XT, $XB",
@@ -1040,17 +1044,14 @@ def : Pat<(v2f64 (bitconvert v1i128:$A)),
def : Pat<(v1i128 (bitconvert v2f64:$A)),
(COPY_TO_REGCLASS $A, VRRC)>;
-// sign extension patterns
-// To extend "in place" from v2i32 to v2i64, we have input data like:
-// | undef | i32 | undef | i32 |
-// but xvcvsxwdp expects the input in big-Endian format:
-// | i32 | undef | i32 | undef |
-// so we need to shift everything to the left by one i32 (word) before
-// the conversion.
-def : Pat<(sext_inreg v2i64:$C, v2i32),
- (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>;
-def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
- (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+def : Pat<(v2i64 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
(v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
@@ -1069,10 +1070,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
// Stores.
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
}
let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
@@ -1159,6 +1156,26 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
(XVRSQRTEDP $A)>;
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+
let Predicates = [IsLittleEndian] in {
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1200,6 +1217,27 @@ def ScalarLoads {
dag Li32 = (i32 (load xoaddr:$src));
}
+def DWToSPExtractConv {
+ dag El0US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag El0SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+ dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
// The following VSX instructions were introduced in Power ISA 2.07
/* FIXME: if the operands are v2i64, these patterns will not match.
we should define new patterns or otherwise match the same patterns
@@ -1241,23 +1279,19 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
"lxsiwzx $XT, $src", IIC_LdStLFD, []>;
- // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
- // would cause these Pseudos are not expanded in expandPostRAPseudos()
- let isPseudo = 1 in {
- // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
- let CodeSize = 3 in
- def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
- "#XFLOADf32",
- [(set f32:$XT, (load xoaddr:$src))]>;
- // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
- def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
- "#LIWAX",
- [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
- // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
- def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
- "#LIWZX",
- [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
- }
+ // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
+ let CodeSize = 3 in
+ def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
+ "#XFLOADf32",
+ [(set f32:$XT, (load xoaddr:$src))]>;
+ // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
+ def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWAX",
+ [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+ // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
+ def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWZX",
+ [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
@@ -1268,19 +1302,15 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
"stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
- // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
- // would cause these Pseudos are not expanded in expandPostRAPseudos()
- let isPseudo = 1 in {
- // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
- let CodeSize = 3 in
- def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
- "#XFSTOREf32",
- [(store f32:$XT, xoaddr:$dst)]>;
- // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
- def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
- "#STIWX",
- [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
- }
+ // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
+ let CodeSize = 3 in
+ def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
+ "#XFSTOREf32",
+ [(store f32:$XT, xoaddr:$dst)]>;
+ // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
+ def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
+ "#STIWX",
+ [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
} // mayStore
} // UseVSXReg = 1
@@ -1443,35 +1473,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // UseVSXReg = 1
let Predicates = [IsLittleEndian] in {
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+ def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1SS1,
(f32 (XSCVSXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1US1,
(f32 (XSCVUXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
}
let Predicates = [IsBigEndian] in {
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
}
// Instructions for converting float to i64 feeding a store.
@@ -1993,6 +2015,10 @@ let Predicates = [IsLittleEndian, HasVSX] in
def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
(f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -2671,6 +2697,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
"xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
+ def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
+ (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
+
// Extract Exponent/Significand DP/QP
def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
@@ -2678,6 +2707,10 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
+ def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
+ (i64 (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (XSXEXPQP $vA)), sub_64)))>;
+
// Vector Insert Word
let UseVSXReg = 1 in {
// XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
@@ -3238,20 +3271,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f64 (PPCVexts f64:$A, 2)),
(f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
- let isPseudo = 1 in {
- def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
- "#DFLOADf32",
- [(set f32:$XT, (load ixaddr:$src))]>;
- def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
- "#DFLOADf64",
- [(set f64:$XT, (load ixaddr:$src))]>;
- def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
- "#DFSTOREf32",
- [(store f32:$XT, ixaddr:$dst)]>;
- def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
- "#DFSTOREf64",
- [(store f64:$XT, ixaddr:$dst)]>;
- }
+ def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
+ "#DFLOADf32",
+ [(set f32:$XT, (load ixaddr:$src))]>;
+ def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
+ "#DFLOADf64",
+ [(set f64:$XT, (load ixaddr:$src))]>;
+ def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+ "#DFSTOREf32",
+ [(store f32:$XT, ixaddr:$dst)]>;
+ def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+ "#DFSTOREf64",
+ [(store f64:$XT, ixaddr:$dst)]>;
+
def : Pat<(f64 (extloadf32 ixaddr:$src)),
(COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
@@ -3533,22 +3565,20 @@ let AddedComplexity = 400 in {
}
let Predicates = [HasP9Vector] in {
- let isPseudo = 1 in {
- let mayStore = 1 in {
- def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
- (ins spilltovsrrc:$XT, memrr:$dst),
- "#SPILLTOVSR_STX", []>;
- def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
- "#SPILLTOVSR_ST", []>;
- }
- let mayLoad = 1 in {
- def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
- (ins memrr:$src),
- "#SPILLTOVSR_LDX", []>;
- def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
- "#SPILLTOVSR_LD", []>;
+ let mayStore = 1 in {
+ def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
+ (ins spilltovsrrc:$XT, memrr:$dst),
+ "#SPILLTOVSR_STX", []>;
+ def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
+ "#SPILLTOVSR_ST", []>;
+ }
+ let mayLoad = 1 in {
+ def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
+ (ins memrr:$src),
+ "#SPILLTOVSR_LDX", []>;
+ def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
+ "#SPILLTOVSR_LD", []>;
- }
}
}
// Integer extend helper dags 32 -> 64
@@ -3797,6 +3827,15 @@ let AddedComplexity = 400 in {
(XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
}
+ let Predicates = [IsBigEndian, HasP8Vector] in {
+ def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+ (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+ def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+ (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+ }
+
// Big endian, available on all targets with VSX
let Predicates = [IsBigEndian, HasVSX] in {
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3825,6 +3864,15 @@ let AddedComplexity = 400 in {
(v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
}
+ let Predicates = [IsLittleEndian, HasP8Vector] in {
+ def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+ (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+ def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+ (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+ }
+
let Predicates = [IsLittleEndian, HasVSX] in {
// Little endian, available on all targets with VSX
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3869,10 +3917,11 @@ let AddedComplexity = 400 in {
(COPY_TO_REGCLASS (MTVSRD $A), VSRC),
(COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0),
- (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>;
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
}
@@ -3884,10 +3933,11 @@ let AddedComplexity = 400 in {
(COPY_TO_REGCLASS (MTVSRD $B), VSRC),
(COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0),
- (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>;
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
}
@@ -3940,10 +3990,9 @@ let AddedComplexity = 400 in {
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
(v2i64 (MTVSRDD $rB, $rA))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW
- (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)),
- (v4i32
- (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>;
+ (MTVSRDD
+ (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
+ (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
}
let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
@@ -3953,10 +4002,9 @@ let AddedComplexity = 400 in {
def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
(v2i64 (MTVSRDD $rB, $rA))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW
- (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)),
- (v4i32
- (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>;
+ (MTVSRDD
+ (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
+ (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
}
// P9 Altivec instructions that can be used to build vectors.
// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
@@ -4005,3 +4053,21 @@ let AddedComplexity = 400 in {
}
}
+// Put this P9Altivec related definition here since it's possible to be
+// selected to VSX instruction xvnegsp, avoid possible undef.
+let Predicates = [HasP9Altivec] in {
+
+ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+ (v4i32 (VABSDUW $A, $B))>;
+
+ def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+ (v8i16 (VABSDUH $A, $B))>;
+
+ def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+ (v16i8 (VABSDUB $A, $B))>;
+
+ // As PPCVABSD description, the last operand indicates whether do the
+ // sign bit flip.
+ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+ (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
new file mode 100644
index 000000000000..d2a09f30c0f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
@@ -0,0 +1,19 @@
+//===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for PPC.
+//
+//===----------------------------------------------------------------------===//
+
+def CpuCyclesPfmCounter : PfmCounter<"CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+ let CycleCounter = CpuCyclesPfmCounter;
+}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 1892d1e3dc26..4458b92ceb5e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +34,8 @@ STATISTIC(NumRRConvertedInPreEmit,
"Number of r+r instructions converted to r+i in pre-emit peephole");
STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
+STATISTIC(NumberOfSelfCopies,
+ "Number of self copy instructions eliminated");
static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -60,9 +63,32 @@ namespace {
return false;
bool Changed = false;
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
+ unsigned Opc = MI.getOpcode();
+ // Detect self copies - these can result from running AADB.
+ if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.getNumOperands() == 3 &&
+ MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+ MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+ NumberOfSelfCopies++;
+ LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
+ else if (MCID.getNumOperands() == 2 &&
+ MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ NumberOfSelfCopies++;
+ LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
+ }
MachineInstr *DefMIToErase = nullptr;
if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
Changed = true;
@@ -74,6 +100,75 @@ namespace {
}
}
}
+
+ // Eliminate conditional branch based on a constant CR bit by
+ // CRSET or CRUNSET. We eliminate the conditional branch or
+ // convert it into an unconditional branch. Also, if the CR bit
+ // is not used by other instructions, we eliminate CRSET as well.
+ auto I = MBB.getFirstInstrTerminator();
+ if (I == MBB.instr_end())
+ continue;
+ MachineInstr *Br = &*I;
+ if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
+ continue;
+ MachineInstr *CRSetMI = nullptr;
+ unsigned CRBit = Br->getOperand(0).getReg();
+ unsigned CRReg = getCRFromCRBit(CRBit);
+ bool SeenUse = false;
+ MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
+ for (It++; It != Er; It++) {
+ if (It->modifiesRegister(CRBit, TRI)) {
+ if ((It->getOpcode() == PPC::CRUNSET ||
+ It->getOpcode() == PPC::CRSET) &&
+ It->getOperand(0).getReg() == CRBit)
+ CRSetMI = &*It;
+ break;
+ }
+ if (It->readsRegister(CRBit, TRI))
+ SeenUse = true;
+ }
+ if (!CRSetMI) continue;
+
+ unsigned CRSetOp = CRSetMI->getOpcode();
+ if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
+ (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
+ // Remove this branch since it cannot be taken.
+ InstrsToErase.push_back(Br);
+ MBB.removeSuccessor(Br->getOperand(1).getMBB());
+ }
+ else {
+ // This conditional branch is always taken. So, remove all branches
+ // and insert an unconditional branch to the destination of this.
+ MachineBasicBlock::iterator It = Br, Er = MBB.end();
+ for (; It != Er; It++) {
+ if (It->isDebugInstr()) continue;
+ assert(It->isTerminator() && "Non-terminator after a terminator");
+ InstrsToErase.push_back(&*It);
+ }
+ if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
+ ArrayRef<MachineOperand> NoCond;
+ TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
+ NoCond, Br->getDebugLoc());
+ }
+ for (auto &Succ : MBB.successors())
+ if (Succ != Br->getOperand(1).getMBB()) {
+ MBB.removeSuccessor(Succ);
+ break;
+ }
+ }
+
+ // If the CRBit is not used by another instruction, we can eliminate
+ // CRSET/CRUNSET instruction.
+ if (!SeenUse) {
+ // We need to check use of the CRBit in successors.
+ for (auto &SuccMBB : MBB.successors())
+ if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
+ SeenUse = true;
+ break;
+ }
+ if (!SeenUse)
+ InstrsToErase.push_back(CRSetMI);
+ }
}
for (MachineInstr *MI : InstrsToErase) {
LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 96923a97a82c..3d067aa8e621 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -673,12 +673,15 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned SrcReg = MI.getOperand(0).getReg();
- BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL),
- getCRFromCRBit(SrcReg))
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
+ // We need to move the CR field that contains the CR bit we are spilling.
+ // The super register may not be explicitly defined (i.e. it can be defined
+ // by a CR-logical that only defines the subreg) so we state that the CR
+ // field is undef. Also, in order to preserve the kill flag on the CR bit,
+ // we add it as an implicit use.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
- .addReg(getCRFromCRBit(SrcReg));
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+ .addReg(SrcReg,
+ RegState::Implicit | getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0LT, shift the bits left so that the bit to
// store is the first one. Mask all but that bit.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 91a98ee4efc7..e93fe4ce3453 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -85,8 +85,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
/// We require the register scavenger.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
@@ -141,6 +139,23 @@ public:
// Base pointer (stack realignment) support.
unsigned getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
+
+ /// stripRegisterPrefix - This method strips the character prefix from a
+ /// register name so that only the number is left. Used by for linux asm.
+ static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'q': // for QPX
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 0e641cf9e00a..d0d29b6d2c7d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -85,6 +85,12 @@ class VSRL<FPR SubReg, string n> : PPCReg<n> {
let SubRegIndices = [sub_64];
}
+// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
+// and encoding to match.
+class VSXReg<bits<6> num, string n> : PPCReg<n> {
+ let HWEncoding{5-0} = num;
+}
+
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
@@ -148,7 +154,7 @@ foreach Index = 0-31 in {
// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
// asm printing.
foreach Index = 32-63 in {
- def VSX#Index : PPCReg<"vs"#Index>;
+ def VSX#Index : VSXReg<Index, "vs"#Index>;
}
// The reprsentation of r0 when treated as the constant 0.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index 5ad0a517c117..c8fe7d7eea78 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -42,7 +42,6 @@ def IIC_LdStLoad : InstrItinClass;
def IIC_LdStLoadUpd : InstrItinClass;
def IIC_LdStLoadUpdX : InstrItinClass;
def IIC_LdStStore : InstrItinClass;
-def IIC_LdStStoreUpd : InstrItinClass;
def IIC_LdStDSS : InstrItinClass;
def IIC_LdStICBI : InstrItinClass;
def IIC_LdStLD : InstrItinClass;
@@ -63,8 +62,8 @@ def IIC_LdStSLBIA : InstrItinClass;
def IIC_LdStSLBIE : InstrItinClass;
def IIC_LdStSTD : InstrItinClass;
def IIC_LdStSTDCX : InstrItinClass;
-def IIC_LdStSTDU : InstrItinClass;
-def IIC_LdStSTDUX : InstrItinClass;
+def IIC_LdStSTU : InstrItinClass;
+def IIC_LdStSTUX : InstrItinClass;
def IIC_LdStSTFD : InstrItinClass;
def IIC_LdStSTFDU : InstrItinClass;
def IIC_LdStSTVEBX : InstrItinClass;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
index 2455e5e52de5..646822eedbe0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -280,13 +280,6 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [P440_LWB]>],
[1, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
- InstrStage<1, [P440_LRACC]>,
- InstrStage<1, [P440_AGEN]>,
- InstrStage<1, [P440_CRD]>,
- InstrStage<2, [P440_LWB]>],
- [2, 1, 1, 1],
- [NoBypass, P440_GPR_Bypass]>,
InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
@@ -373,14 +366,14 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [P440_LWB]>],
[4, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
InstrStage<1, [P440_CRD]>,
InstrStage<2, [P440_LWB]>],
[2, 1, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
InstrStage<1, [P440_CRD]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index 54cfae5d74b7..f34c1accc0fd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -81,8 +81,6 @@ def PPCA2Itineraries : ProcessorItineraries<
[6, 0, 0]>,
InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>],
[0, 0, 0]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>],
- [2, 0, 0, 0]>,
InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>],
[16, 0, 0]>,
InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>],
@@ -105,9 +103,9 @@ def PPCA2Itineraries : ProcessorItineraries<
[82, 0, 0]>, // L2 latency
InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>],
[0, 0, 0]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>],
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [A2_XU]>],
[2, 0, 0, 0]>,
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>],
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [A2_XU]>],
[2, 0, 0, 0]>,
InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>],
[82, 0, 0]>, // L2 latency
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
index d7c2bd15a258..479a970b2537 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
@@ -144,7 +144,13 @@ def PPCE500Itineraries : ProcessorItineraries<
InstrStage<1, [E500_LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, E500_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SU0, E500_SU1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
InstrStage<1, [E500_SU0, E500_SU1], 0>,
InstrStage<1, [E500_LSU_0]>],
[6, 1], // Latency = 3
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index 5f95f2a79f66..d8bda073833f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -157,7 +157,13 @@ def PPCE500mcItineraries : ProcessorItineraries<
InstrStage<1, [E500mc_LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, E500mc_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
+ InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>,
+ InstrStage<1, [E500mc_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500mc_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>,
InstrStage<1, [E500mc_LSU_0]>],
[6, 1], // Latency = 3
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 32f8e652dd56..3e50803955c4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -206,12 +206,6 @@ def PPCE5500Itineraries : ProcessorItineraries<
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
- InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
- InstrStage<1, [E5500_LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, E5500_GPR_Bypass],
- 2>, // 2 micro-ops
InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
@@ -281,13 +275,13 @@ def PPCE5500Itineraries : ProcessorItineraries<
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass],
2>, // 2 micro-ops
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
index 21efd8f8f6c9..0995b7200d93 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -43,7 +43,8 @@ def G3Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
index 340773ef7876..1b15c7b3c7ad 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -48,7 +48,8 @@ def G4Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 1d9f13fcb850..0044c3c6a449 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -56,7 +56,6 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>,
@@ -73,8 +72,8 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
index b5a9f96d45ae..c802b80170fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -54,7 +54,6 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>,
@@ -76,8 +75,8 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work
InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index a8678f56900e..1d6e509819da 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -114,6 +114,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[4, 1, 1]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
@@ -126,6 +130,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[1, 1, 1]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
@@ -253,13 +261,13 @@ def P7Itineraries : ProcessorItineraries<
InstrStage<1, [P7_LS1, P7_LS2], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[1, 1, 1]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<1, [P7_DU1], 0>,
InstrStage<1, [P7_DU2], 0>,
InstrStage<1, [P7_LS1, P7_LS2], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[2, 1, 1, 1]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P7_DU1], 0>,
InstrStage<1, [P7_DU2], 0>,
InstrStage<1, [P7_DU3], 0>,
InstrStage<1, [P7_DU4], 0>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 79963dd6a3e9..ff39dfda7016 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -90,6 +90,10 @@ def P8Itineraries : ProcessorItineraries<
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[4, 1, 1]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
@@ -102,6 +106,10 @@ def P8Itineraries : ProcessorItineraries<
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[1, 1, 1]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
@@ -259,14 +267,14 @@ def P8Itineraries : ProcessorItineraries<
InstrStage<1, [P8_LU1, P8_LU2,
P8_LSU1, P8_LSU2]>]
[1, 1, 1]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<1, [P8_DU1], 0>,
InstrStage<1, [P8_DU2], 0>,
InstrStage<1, [P8_LU1, P8_LU2,
P8_LSU1, P8_LSU2], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[2, 1, 1, 1]>,
// First+last
- InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P8_DU1], 0>,
InstrStage<1, [P8_DU2], 0>,
InstrStage<1, [P8_DU3], 0>,
InstrStage<1, [P8_DU4], 0>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index e1a480117315..a1e625c855e0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -33,6 +33,12 @@ def P9Model : SchedMachineModel {
// A dispatch group is 6 instructions.
let LoopMicroOpBufferSize = 60;
+ // As iops are dispatched to a slice, they are held in an independent slice
+ // issue queue until all register sources and other dependencies have been
+ // resolved and they can be issued. Each of four execution slices has an
+ // 11-entry iop issue queue.
+ let MicroOpBufferSize = 44;
+
let CompleteModel = 1;
// Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index a8d7955ef548..580d057602f5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -181,6 +181,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
const TargetOptions &Options) {
+ if (TT.isOSDarwin())
+ report_fatal_error("Darwin is no longer supported for PowerPC");
+
if (Options.MCOptions.getABIName().startswith("elfv1"))
return PPCTargetMachine::PPC_ABI_ELFv1;
else if (Options.MCOptions.getABIName().startswith("elfv2"))
@@ -211,19 +214,24 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
if (TT.isOSDarwin())
return Reloc::DynamicNoPIC;
- // Non-darwin 64-bit platforms are PIC by default.
- if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)
+ // Big Endian PPC is PIC by default.
+ if (TT.getArch() == Triple::ppc64)
return Reloc::PIC_;
- // 32-bit is static by default.
+ // Rest are static by default.
return Reloc::Static;
}
-static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
- Optional<CodeModel::Model> CM,
- bool JIT) {
- if (CM)
+static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
+ Optional<CodeModel::Model> CM,
+ bool JIT) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
+ if (*CM == CodeModel::Kernel)
+ report_fatal_error("Target does not support the kernel CodeModel");
return *CM;
+ }
if (!TT.isOSDarwin() && !JIT &&
(TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
return CodeModel::Medium;
@@ -243,7 +251,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(TT, CM, JIT), OL),
+ getEffectivePPCCodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())),
TargetABI(computeTargetABI(TT, Options)) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index b0da9b5a6d70..bc9bcab83a0a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -473,7 +473,14 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
+
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 2ee2b3eb8084..9221a910288a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -90,7 +90,9 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
/// @}
};