aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp315
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp87
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp86
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp132
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h46
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp47
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td121
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp651
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp26
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td47
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp202
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp110
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp157
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h50
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp387
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp2723
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h190
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td148
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td248
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td264
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td172
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp802
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td391
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td1192
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td177
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp232
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp390
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp23
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp246
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h52
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td71
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td398
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp88
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h58
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp170
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp156
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp153
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h23
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp256
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h104
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp176
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp335
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp821
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
64 files changed, 9348 insertions, 3332 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index cd36e58b78d3..83de4d996993 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -132,6 +132,35 @@ static const MCPhysReg VSFRegs[64] = {
PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
};
+static const MCPhysReg VSSRegs[64] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+static unsigned QFRegs[32] = {
+ PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
+ PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
+ PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+ PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+ PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+ PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+ PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
static const MCPhysReg CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@@ -261,9 +290,9 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), MII(_MII) {
+ PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), MII(MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -415,7 +444,9 @@ public:
bool isToken() const override { return Kind == Token; }
bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+ bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
@@ -429,6 +460,9 @@ public:
bool isU8ImmX8() const { return Kind == Immediate &&
isUInt<8>(getImm()) &&
(getImm() & 7) == 0; }
+
+ bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); }
+ bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
bool isU16Imm() const {
switch (Kind) {
case Expression:
@@ -507,22 +541,22 @@ public:
void addRegGPRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
}
void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[getReg()]));
}
void addRegG8RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(XRegs[getReg()]));
}
void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()]));
+ Inst.addOperand(MCOperand::createReg(XRegsNoX0[getReg()]));
}
void addRegGxRCOperands(MCInst &Inst, unsigned N) const {
@@ -541,63 +575,83 @@ public:
void addRegF4RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
}
void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
}
void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(VRegs[getReg()]));
}
void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
+ Inst.addOperand(MCOperand::createReg(VSRegs[getVSReg()]));
}
void addRegVSFRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
+ Inst.addOperand(MCOperand::createReg(VSFRegs[getVSReg()]));
+ }
+
+ void addRegVSSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
+ }
+
+ void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ }
+
+ void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ }
+
+ void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
}
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
+ Inst.addOperand(MCOperand::createReg(CRBITRegs[getCRBit()]));
}
void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()]));
+ Inst.addOperand(MCOperand::createReg(CRRegs[getCCReg()]));
}
void addCRBitMaskOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()]));
+ Inst.addOperand(MCOperand::createReg(CRRegs[getCRBitMask()]));
}
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (Kind == Immediate)
- Inst.addOperand(MCOperand::CreateImm(getImm()));
+ Inst.addOperand(MCOperand::createImm(getImm()));
else
- Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ Inst.addOperand(MCOperand::createExpr(getExpr()));
}
void addS16ImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
switch (Kind) {
case Immediate:
- Inst.addOperand(MCOperand::CreateImm(getImm()));
+ Inst.addOperand(MCOperand::createImm(getImm()));
break;
case ContextImmediate:
- Inst.addOperand(MCOperand::CreateImm(getImmS16Context()));
+ Inst.addOperand(MCOperand::createImm(getImmS16Context()));
break;
default:
- Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ Inst.addOperand(MCOperand::createExpr(getExpr()));
break;
}
}
@@ -606,13 +660,13 @@ public:
assert(N == 1 && "Invalid number of operands!");
switch (Kind) {
case Immediate:
- Inst.addOperand(MCOperand::CreateImm(getImm()));
+ Inst.addOperand(MCOperand::createImm(getImm()));
break;
case ContextImmediate:
- Inst.addOperand(MCOperand::CreateImm(getImmU16Context()));
+ Inst.addOperand(MCOperand::createImm(getImmU16Context()));
break;
default:
- Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ Inst.addOperand(MCOperand::createExpr(getExpr()));
break;
}
}
@@ -620,14 +674,14 @@ public:
void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (Kind == Immediate)
- Inst.addOperand(MCOperand::CreateImm(getImm() / 4));
+ Inst.addOperand(MCOperand::createImm(getImm() / 4));
else
- Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ Inst.addOperand(MCOperand::createExpr(getExpr()));
}
void addTLSRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateExpr(getTLSReg()));
+ Inst.addOperand(MCOperand::createExpr(getTLSReg()));
}
StringRef getToken() const {
@@ -738,10 +792,10 @@ void PPCOperand::print(raw_ostream &OS) const {
OS << getImm();
break;
case Expression:
- getExpr()->print(OS);
+ OS << *getExpr();
break;
case TLSRegister:
- getTLSReg()->print(OS);
+ OS << *getTLSReg();
break;
}
}
@@ -749,30 +803,64 @@ void PPCOperand::print(raw_ostream &OS) const {
static void
addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
if (Op.isImm()) {
- Inst.addOperand(MCOperand::CreateImm(-Op.getImm()));
+ Inst.addOperand(MCOperand::createImm(-Op.getImm()));
return;
}
const MCExpr *Expr = Op.getExpr();
if (const MCUnaryExpr *UnExpr = dyn_cast<MCUnaryExpr>(Expr)) {
if (UnExpr->getOpcode() == MCUnaryExpr::Minus) {
- Inst.addOperand(MCOperand::CreateExpr(UnExpr->getSubExpr()));
+ Inst.addOperand(MCOperand::createExpr(UnExpr->getSubExpr()));
return;
}
} else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) {
if (BinExpr->getOpcode() == MCBinaryExpr::Sub) {
const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(),
BinExpr->getLHS(), Ctx);
- Inst.addOperand(MCOperand::CreateExpr(NE));
+ Inst.addOperand(MCOperand::createExpr(NE));
return;
}
}
- Inst.addOperand(MCOperand::CreateExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
+ Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
}
void PPCAsmParser::ProcessInstruction(MCInst &Inst,
const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
switch (Opcode) {
+ case PPC::DCBTx:
+ case PPC::DCBTT:
+ case PPC::DCBTSTx:
+ case PPC::DCBTSTT: {
+ MCInst TmpInst;
+ TmpInst.setOpcode((Opcode == PPC::DCBTx || Opcode == PPC::DCBTT) ?
+ PPC::DCBT : PPC::DCBTST);
+ TmpInst.addOperand(MCOperand::createImm(
+ (Opcode == PPC::DCBTx || Opcode == PPC::DCBTSTx) ? 0 : 16));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::DCBTCT:
+ case PPC::DCBTDS: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::DCBT);
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::DCBTSTCT:
+ case PPC::DCBTSTDS: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::DCBTST);
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
case PPC::LAx: {
MCInst TmpInst;
TmpInst.setOpcode(PPC::LA);
@@ -826,9 +914,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(B));
- TmpInst.addOperand(MCOperand::CreateImm(0));
- TmpInst.addOperand(MCOperand::CreateImm(N - 1));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(N - 1));
Inst = TmpInst;
break;
}
@@ -840,9 +928,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(B + N));
- TmpInst.addOperand(MCOperand::CreateImm(32 - N));
- TmpInst.addOperand(MCOperand::CreateImm(31));
+ TmpInst.addOperand(MCOperand::createImm(B + N));
+ TmpInst.addOperand(MCOperand::createImm(32 - N));
+ TmpInst.addOperand(MCOperand::createImm(31));
Inst = TmpInst;
break;
}
@@ -855,9 +943,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(32 - B));
- TmpInst.addOperand(MCOperand::CreateImm(B));
- TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1));
+ TmpInst.addOperand(MCOperand::createImm(32 - B));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ TmpInst.addOperand(MCOperand::createImm((B + N) - 1));
Inst = TmpInst;
break;
}
@@ -870,9 +958,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(32 - (B + N)));
- TmpInst.addOperand(MCOperand::CreateImm(B));
- TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1));
+ TmpInst.addOperand(MCOperand::createImm(32 - (B + N)));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ TmpInst.addOperand(MCOperand::createImm((B + N) - 1));
Inst = TmpInst;
break;
}
@@ -883,9 +971,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(32 - N));
- TmpInst.addOperand(MCOperand::CreateImm(0));
- TmpInst.addOperand(MCOperand::CreateImm(31));
+ TmpInst.addOperand(MCOperand::createImm(32 - N));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(31));
Inst = TmpInst;
break;
}
@@ -896,9 +984,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(N));
- TmpInst.addOperand(MCOperand::CreateImm(0));
- TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(31 - N));
Inst = TmpInst;
break;
}
@@ -909,9 +997,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(32 - N));
- TmpInst.addOperand(MCOperand::CreateImm(N));
- TmpInst.addOperand(MCOperand::CreateImm(31));
+ TmpInst.addOperand(MCOperand::createImm(32 - N));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(31));
Inst = TmpInst;
break;
}
@@ -922,9 +1010,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(0));
- TmpInst.addOperand(MCOperand::CreateImm(0));
- TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(31 - N));
Inst = TmpInst;
break;
}
@@ -936,9 +1024,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(N));
- TmpInst.addOperand(MCOperand::CreateImm(B - N));
- TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(B - N));
+ TmpInst.addOperand(MCOperand::createImm(31 - N));
Inst = TmpInst;
break;
}
@@ -950,8 +1038,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(B));
- TmpInst.addOperand(MCOperand::CreateImm(N - 1));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ TmpInst.addOperand(MCOperand::createImm(N - 1));
Inst = TmpInst;
break;
}
@@ -963,8 +1051,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(B + N));
- TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+ TmpInst.addOperand(MCOperand::createImm(B + N));
+ TmpInst.addOperand(MCOperand::createImm(64 - N));
Inst = TmpInst;
break;
}
@@ -977,8 +1065,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(64 - (B + N)));
- TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::createImm(64 - (B + N)));
+ TmpInst.addOperand(MCOperand::createImm(B));
Inst = TmpInst;
break;
}
@@ -989,8 +1077,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(64 - N));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::createImm(64 - N));
+ TmpInst.addOperand(MCOperand::createImm(0));
Inst = TmpInst;
break;
}
@@ -1001,8 +1089,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(N));
- TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(63 - N));
Inst = TmpInst;
break;
}
@@ -1013,8 +1101,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(64 - N));
- TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::createImm(64 - N));
+ TmpInst.addOperand(MCOperand::createImm(N));
Inst = TmpInst;
break;
}
@@ -1025,8 +1113,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(0));
- TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(63 - N));
Inst = TmpInst;
break;
}
@@ -1038,8 +1126,60 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(N));
- TmpInst.addOperand(MCOperand::CreateImm(B - N));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(B - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWINMbm:
+ case PPC::RLWINMobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::createImm(MB));
+ TmpInst.addOperand(MCOperand::createImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWIMIbm:
+ case PPC::RLWIMIobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::createImm(MB));
+ TmpInst.addOperand(MCOperand::createImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWNMbm:
+ case PPC::RLWNMobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::createImm(MB));
+ TmpInst.addOperand(MCOperand::createImm(ME));
Inst = TmpInst;
break;
}
@@ -1105,10 +1245,18 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = FRegs[IntVal];
return false;
+ } else if (Name.startswith_lower("vs") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
+ RegNo = VSRegs[IntVal];
+ return false;
} else if (Name.startswith_lower("v") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = VRegs[IntVal];
return false;
+ } else if (Name.startswith_lower("q") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = QFRegs[IntVal];
+ return false;
} else if (Name.startswith_lower("cr") &&
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = CRRegs[IntVal];
@@ -1526,6 +1674,21 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return true;
}
+ // We'll now deal with an unfortunate special case: the syntax for the dcbt
+ // and dcbtst instructions differs for server vs. embedded cores.
+ // The syntax for dcbt is:
+ // dcbt ra, rb, th [server]
+ // dcbt th, ra, rb [embedded]
+ // where th can be omitted when it is 0. dcbtst is the same. We take the
+ // server form to be the default, so swap the operands if we're parsing for
+ // an embedded core (they'll be swapped again upon printing).
+ if (STI.getFeatureBits()[PPC::FeatureBookE] &&
+ Operands.size() == 4 &&
+ (Name == "dcbt" || Name == "dcbtst")) {
+ std::swap(Operands[1], Operands[3]);
+ std::swap(Operands[2], Operands[1]);
+ }
+
return false;
}
@@ -1700,7 +1863,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
Error(L, "expected identifier in directive");
return false;
}
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
if (getLexer().isNot(AsmToken::Comma)) {
Error(L, "unexpected token in directive");
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 5251b60f3480..4799ea27c4b4 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler {
public:
PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- virtual ~PPCDisassembler() {}
+ ~PPCDisassembler() override {}
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -131,6 +131,26 @@ static const unsigned VSFRegs[] = {
PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
};
+static const unsigned VSSRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
static const unsigned GPRegs[] = {
PPC::R0, PPC::R1, PPC::R2, PPC::R3,
PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@@ -164,11 +184,22 @@ static const unsigned G8Regs[] = {
PPC::X28, PPC::X29, PPC::X30, PPC::X31
};
+static const unsigned QFRegs[] = {
+ PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
+ PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
+ PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+ PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+ PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+ PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+ PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
+
template <std::size_t N>
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
const unsigned (&Regs)[N]) {
assert(RegNo < N && "Invalid register number");
- Inst.addOperand(MCOperand::CreateReg(Regs[RegNo]));
+ Inst.addOperand(MCOperand::createReg(Regs[RegNo]));
return MCDisassembler::Success;
}
@@ -178,6 +209,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, CRRegs);
}
+static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
@@ -214,6 +251,12 @@ static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, VSFRegs);
}
+static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSSRegs);
+}
+
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
@@ -235,11 +278,20 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
+static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, QFRegs);
+}
+
+#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
+#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
+
template<unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ Inst.addOperand(MCOperand::createImm(Imm));
return MCDisassembler::Success;
}
@@ -247,7 +299,7 @@ template<unsigned N>
static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
- Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+ Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
return MCDisassembler::Success;
}
@@ -270,19 +322,19 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
case PPC::LFSU:
case PPC::LFDU:
// Add the tied output operand.
- Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
break;
case PPC::STBU:
case PPC::STHU:
case PPC::STWU:
case PPC::STFSU:
case PPC::STFDU:
- Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+ Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
break;
}
- Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp)));
- Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp)));
+ Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
return MCDisassembler::Success;
}
@@ -298,12 +350,12 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
if (Inst.getOpcode() == PPC::LDU)
// Add the tied output operand.
- Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
else if (Inst.getOpcode() == PPC::STDU)
- Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+ Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
- Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp << 2)));
- Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2)));
+ Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
return MCDisassembler::Success;
}
@@ -314,7 +366,7 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
unsigned Zeros = countTrailingZeros(Imm);
assert(Zeros < 8 && "Invalid CR bit value");
- Inst.addOperand(MCOperand::CreateReg(CRRegs[7 - Zeros]));
+ Inst.addOperand(MCOperand::createReg(CRRegs[7 - Zeros]));
return MCDisassembler::Success;
}
@@ -335,6 +387,15 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Inst =
(Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
+ if (STI.getFeatureBits()[PPC::FeatureQPX]) {
+ DecodeStatus result =
+ decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
+ if (result != MCDisassembler::Fail)
+ return result;
+
+ MI.clear();
+ }
+
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 670c40a2a3bd..1a130e87bf3e 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -17,6 +17,8 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -31,14 +33,28 @@ static cl::opt<bool>
FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
cl::desc("Use full register names when printing assembly"));
+#define PRINT_ALIAS_INSTR
#include "PPCGenAsmWriter.inc"
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+ const char *RegName = getRegisterName(RegNo);
+ if (RegName[0] == 'q' /* QPX */) {
+ // The system toolchain on the BG/Q does not understand QPX register names
+ // in .cfi_* directives, so print the name of the floating-point
+ // subregister instead.
+ std::string RN(RegName);
+
+ RN[0] = 'f';
+ OS << RN;
+
+ return;
+ }
+
+ OS << RegName;
}
void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
@@ -87,6 +103,38 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
}
+
+ // dcbt[st] is printed manually here because:
+ // 1. The assembly syntax is different between embedded and server targets
+ // 2. We must print the short mnemonics for TH == 0 because the
+ // embedded/server syntax default will not be stable across assemblers
+ // The syntax for dcbt is:
+ // dcbt ra, rb, th [server]
+ // dcbt th, ra, rb [embedded]
+ // where th can be omitted when it is 0. dcbtst is the same.
+ if (MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) {
+ unsigned char TH = MI->getOperand(0).getImm();
+ O << "\tdcbt";
+ if (MI->getOpcode() == PPC::DCBTST)
+ O << "st";
+ if (TH == 16)
+ O << "t";
+ O << " ";
+
+ bool IsBookE = STI.getFeatureBits()[PPC::FeatureBookE];
+ if (IsBookE && TH != 0 && TH != 16)
+ O << (unsigned int) TH << ", ";
+
+ printOperand(MI, 1, O);
+ O << ", ";
+ printOperand(MI, 2, O);
+
+ if (!IsBookE && TH != 0 && TH != 16)
+ O << ", " << (unsigned int) TH;
+
+ printAnnotation(O, Annot);
+ return;
+ }
// For fast-isel, a COPY_TO_REGCLASS may survive this long. This is
// used when converting a 32-bit float to a 64-bit float as part of
@@ -98,8 +146,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// precision). FIXME: Is there a better solution?
if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS)
return;
-
- printInstruction(MI, O);
+
+ if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -201,6 +250,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 1 && "Invalid u1imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -208,6 +264,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 8 && "Invalid u3imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -236,6 +299,20 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned short Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 1023 && "Invalid u10imm argument!");
+ O << (unsigned short)Value;
+}
+
+void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned short Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 4095 && "Invalid u12imm argument!");
+ O << (unsigned short)Value;
+}
+
void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
@@ -338,6 +415,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
+ case 'q': // for QPX
case 'v':
if (RegName[1] == 's')
return RegName + 2;
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index b21aa22daa1c..8e1878344302 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -32,22 +32,31 @@ public:
}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx,
+ raw_ostream &OS);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = nullptr);
+ void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index bea88a2ff2c6..86885e111dd1 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -178,12 +178,7 @@ public:
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write32(0x60000000);
- switch (Count % 4) {
- default: break; // No leftover bytes to write
- case 1: OW->Write8(0); break;
- case 2: OW->Write16(0); break;
- case 3: OW->Write16(0); OW->Write8(0); break;
- }
+ OW->WriteZeros(Count % 4);
return true;
}
@@ -208,7 +203,7 @@ namespace {
public:
DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCMachObjectWriter(
OS,
@@ -224,8 +219,7 @@ namespace {
ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index b817394e52c6..3e3489fc46aa 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
}
}
-MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit,
bool IsLittleEndian,
uint8_t OSABI) {
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 2b4f2d81db85..d8fab5b7c01a 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -45,6 +45,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
void PPCELFMCAsmInfo::anchor() { }
PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
+ // FIXME: This is not always needed. For example, it is not needed in the
+ // v2 abi.
+ NeedsLocalForSize = true;
+
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 786b7fe35d66..17f4cd421641 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -14,11 +14,13 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -31,19 +33,19 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
class PPCMCCodeEmitter : public MCCodeEmitter {
- PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
- void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+ void operator=(const PPCMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCContext &CTX;
bool IsLittleEndian;
public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle)
- : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) {
- }
-
- ~PPCMCCodeEmitter() {}
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), CTX(ctx),
+ IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+
+ ~PPCMCCodeEmitter() override {}
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -96,7 +98,7 @@ public:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override {
// For fast-isel, a float COPY_TO_REGCLASS can survive this long.
@@ -158,14 +160,11 @@ public:
};
} // end anonymous namespace
-
+
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
- Triple TT(STI.getTargetTriple());
- bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
- return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian);
+ return new PPCMCCodeEmitter(MCII, Ctx);
}
unsigned PPCMCCodeEmitter::
@@ -176,7 +175,7 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
return 0;
}
@@ -188,7 +187,7 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_brcond14));
return 0;
}
@@ -201,7 +200,7 @@ getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24abs));
return 0;
}
@@ -214,7 +213,7 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_brcond14abs));
return 0;
}
@@ -226,7 +225,7 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the immediate field.
- Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
return 0;
}
@@ -244,7 +243,7 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
return RegBits;
}
@@ -263,7 +262,7 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16ds));
return RegBits;
}
@@ -326,7 +325,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
// Return the thread-pointer register's encoding.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
Triple TT(STI.getTargetTriple());
bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
@@ -340,7 +339,7 @@ unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
// (__tls_get_addr), which we create via getDirectBrEncoding as usual,
// and one for the TLSGD or TLSLD symbol, which is emitted here.
const MCOperand &MO = MI.getOperand(OpNo+1);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
return getDirectBrEncoding(MI, OpNo, Fixups, STI);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index f0a6bb9b5f4b..ca72ccf0f76e 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -36,9 +36,8 @@ private:
int64_t EvaluateAsInt64(int64_t Value) const;
- explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
- bool _IsDarwin)
- : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
+ explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
+ : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
public:
/// @name Construction
@@ -83,7 +82,7 @@ public:
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- const MCSection *FindAssociatedSection() const override {
+ MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index f2da38961684..847437611a5f 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -108,7 +108,7 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
(T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le))
CM = CodeModel::Medium;
}
- X->InitMCCodeGenInfo(RM, CM, OL);
+ X->initMCCodeGenInfo(RM, CM, OL);
return X;
}
@@ -145,6 +145,7 @@ public:
}
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
+ Streamer.EmitValueToAlignment(8);
Streamer.EmitSymbolValue(&S, 8);
}
void emitMachine(StringRef CPU) override {
@@ -222,99 +223,60 @@ public:
};
}
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- if (Triple(TT).isOSDarwin()) {
- MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- new PPCTargetMachOStreamer(*S);
- return S;
- }
-
- MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- new PPCTargetELFStreamer(*S);
- return S;
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new PPCTargetAsmStreamer(S, OS);
}
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
-
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new PPCTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new PPCTargetELFStreamer(S);
+ return new PPCTargetMachOStreamer(S);
}
-static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
- return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
+ const MCRegisterInfo &MRI) {
+ return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin());
}
extern "C" void LLVMInitializePowerPCTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
- RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
- RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget,
- createPPCMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget,
- createPPCMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
- createPPCMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
- createPPCMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget,
- createPPCMCSubtargetInfo);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget,
- createPPCMCCodeEmitter);
-
+ for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter);
+
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget,
- createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend);
+
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter);
+ }
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 68f7f7aac82d..5f2117c88e46 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -18,6 +18,7 @@
#undef PPC
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
namespace llvm {
class MCAsmBackend;
@@ -29,29 +30,56 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
+class raw_pwrite_stream;
class raw_ostream;
extern Target ThePPC32Target;
extern Target ThePPC64Target;
extern Target ThePPC64LETarget;
-
+
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
-MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- bool IsLittleEndian,
- uint8_t OSABI);
-/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
-MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+/// Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+ bool IsLittleEndian, uint8_t OSABI);
+/// Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
uint32_t CPUType,
uint32_t CPUSubtype);
+
+/// Returns true iff Val consists of one contiguous run of 1s with any number of
+/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so
+/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not,
+/// since all 1s are not contiguous.
+static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (!Val)
+ return false;
+
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = countLeadingZeros(Val);
+ // look for the first zero bit after the run of ones
+ ME = countLeadingZeros((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = countLeadingZeros(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
} // End llvm namespace
// Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index df2f14a91a7e..3c906d2a51e3 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -41,7 +41,7 @@ public:
: MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/Is64Bit) {}
- void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override {
@@ -212,7 +212,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
report_fatal_error("symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
- uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint64_t SecAddr =
Writer->getSectionAddress(A_SD->getFragment()->getParent());
FixedValue += SecAddr;
@@ -226,7 +226,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
"' can not be undefined in a subtraction expression");
// FIXME: is Type correct? see include/llvm/Support/MachO.h
- Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
// FIXME: does FixedValue get used??
@@ -244,7 +244,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
if (FixupOffset > 0xffffff) {
char Buffer[32];
format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
- Asm.getContext().FatalError(Fixup.getLoc(),
+ Asm.getContext().reportFatalError(Fixup.getLoc(),
Twine("Section too large, can't encode "
"r_address (") +
Buffer + ") into 24 bits of scattered "
@@ -282,7 +282,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
MachO::any_relocation_info MRE;
makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
Log2Size, IsPCRel, Value2);
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
} else {
// If the offset is more than 24-bits, it won't fit in a scattered
// relocation offset field, so we fall back to using a non-scattered
@@ -296,7 +296,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
}
MachO::any_relocation_info MRE;
makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
return true;
}
@@ -324,16 +324,16 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// this doesn't seem right for RIT_PPC_BR24
// Get the symbol data, if any.
- const MCSymbolData *SD = nullptr;
+ const MCSymbol *A = nullptr;
if (Target.getSymA())
- SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+ A = &Target.getSymA()->getSymbol();
// See <reloc.h>.
const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
unsigned Index = 0;
- unsigned IsExtern = 0;
unsigned Type = RelocType;
+ const MCSymbol *RelSymbol = nullptr;
if (Target.isAbsolute()) { // constant
// SymbolNum of 0 indicates the absolute section.
//
@@ -344,9 +344,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// the above line stolen from ARM, not sure
} else {
// Resolve constant variables.
- if (SD->getSymbol().isVariable()) {
+ if (A->isVariable()) {
int64_t Res;
- if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ if (A->getVariableValue()->EvaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
return;
@@ -354,20 +354,18 @@ void PPCMachObjectWriter::RecordPPCRelocation(
}
// Check whether we need an external or internal relocation.
- if (Writer->doesSymbolRequireExternRelocation(SD)) {
- IsExtern = 1;
- Index = SD->getIndex();
+ if (Writer->doesSymbolRequireExternRelocation(*A)) {
+ RelSymbol = A;
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
- if (!SD->getSymbol().isUndefined())
- FixedValue -= Layout.getSymbolOffset(SD);
+ if (!A->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(*A);
} else {
// The index is the section ordinal (1-based).
- const MCSectionData &SymSD =
- Asm.getSectionData(SD->getSymbol().getSection());
- Index = SymSD.getOrdinal() + 1;
- FixedValue += Writer->getSectionAddress(&SymSD);
+ const MCSection &Sec = A->getSection();
+ Index = Sec.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&Sec);
}
if (IsPCRel)
FixedValue -= Writer->getSectionAddress(Fragment->getParent());
@@ -375,13 +373,12 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
- makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
- Type);
- Writer->addRelocation(Fragment->getParent(), MRE);
+ makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
- uint32_t CPUType,
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
return createMachObjectWriter(
new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index 8fb33df3fe9a..ae8d8b4f5dfe 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -34,18 +34,19 @@ namespace llvm {
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
+ FunctionPass *createPPCLoopDataPrefetchPass();
+ FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM);
+ FunctionPass *createPPCTOCRegDepsPass();
FunctionPass *createPPCEarlyReturnPass();
FunctionPass *createPPCVSXCopyPass();
- FunctionPass *createPPCVSXCopyCleanupPass();
FunctionPass *createPPCVSXFMAMutatePass();
+ FunctionPass *createPPCVSXSwapRemovalPass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCTLSDynamicCallPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
- /// \brief Creates an PPC-specific Target Transformation Info pass.
- ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
-
void initializePPCVSXFMAMutatePass(PassRegistry&);
extern char &PPCVSXFMAMutateID;
@@ -93,12 +94,7 @@ namespace llvm {
MO_TOC_LO = 7 << 4,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
- MO_TLS = 8 << 4,
-
- // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
- // call sequences.
- MO_TLSLD = 9 << 4,
- MO_TLSGD = 10 << 4
+ MO_TLS = 8 << 4
};
} // end namespace PPCII
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index a7fd62c07303..1a02bcca9362 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -18,7 +18,7 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// PowerPC Subtarget features.
//
-
+
//===----------------------------------------------------------------------===//
// CPU Directives //
//===----------------------------------------------------------------------===//
@@ -86,12 +86,19 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
"Enable the popcnt[dw] instructions">;
+def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true",
+ "Enable the bpermd instruction">;
+def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true",
+ "Enable extended divide instructions">;
def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
"Enable the ldbrx instruction">;
def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true",
"Enable the cmpb instruction">;
+def FeatureICBT : SubtargetFeature<"icbt","HasICBT", "true",
+ "Enable icbt instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
- "Enable Book E instructions">;
+ "Enable Book E instructions",
+ [FeatureICBT]>;
def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
"Has only the msync instruction instead of sync",
[FeatureBookE]>;
@@ -106,15 +113,66 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
+def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
+ "Enable POWER8 Altivec instructions",
+ [FeatureAltivec]>;
+def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
+ "Enable POWER8 Crypto instructions",
+ [FeatureP8Altivec]>;
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
- [FeatureVSX, FeatureAltivec]>;
+ [FeatureVSX, FeatureP8Altivec]>;
+def FeatureDirectMove :
+ SubtargetFeature<"direct-move", "HasDirectMove", "true",
+ "Enable Power8 direct move instructions",
+ [FeatureVSX]>;
+def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
+ "HasPartwordAtomics", "true",
+ "Enable l[bh]arx and st[bh]cx.">;
+def FeatureInvariantFunctionDescriptors :
+ SubtargetFeature<"invariant-function-descriptors",
+ "HasInvariantFunctionDescriptors", "true",
+ "Assume function descriptors are invariant">;
+def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
+ "Enable Hardware Transactional Memory instructions">;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
"Treat vector data stream cache control instructions as deprecated">;
+/* Since new processors generally contain a superset of features of those that
+ came before them, the idea is to make implementations of new processors
+ less error prone and easier to read.
+ Namely:
+ list<SubtargetFeature> Power8FeatureList = ...
+ list<SubtargetFeature> FutureProcessorSpecificFeatureList =
+ [ features that Power8 does not support ]
+ list<SubtargetFeature> FutureProcessorFeatureList =
+ !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+
+ Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+ well as providing a single point of definition if the feature set will be
+ used elsewhere.
+*/
+def ProcessorFeatures {
+ list<SubtargetFeature> Power7FeatureList =
+ [DirectivePwr7, FeatureAltivec, FeatureVSX,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureBPERMD, FeatureExtDiv,
+ DeprecatedMFTB, DeprecatedDST];
+ list<SubtargetFeature> Power8SpecificFeatures =
+ [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
+ FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+ list<SubtargetFeature> Power8FeatureList =
+ !listconcat(Power7FeatureList, Power8SpecificFeatures);
+}
+
// Note: Future features to add when support is extended to more
// recent ISA levels:
//
@@ -122,16 +180,6 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
// POPCNTB p5 through p7 popcntb and related instructions
//===----------------------------------------------------------------------===//
-// ABI Selection //
-//===----------------------------------------------------------------------===//
-
-def FeatureELFv1 : SubtargetFeature<"elfv1", "TargetABI", "PPC_ABI_ELFv1",
- "Use the ELFv1 ABI">;
-
-def FeatureELFv2 : SubtargetFeature<"elfv2", "TargetABI", "PPC_ABI_ELFv2",
- "Use the ELFv2 ABI">;
-
-//===----------------------------------------------------------------------===//
// Classes used for relation maps.
//===----------------------------------------------------------------------===//
// RecFormRel - Filter class used to relate non-record-form instructions with
@@ -202,12 +250,12 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, FeatureMSYNC,
- DeprecatedMFTB]>;
+ FeatureICBT, FeatureBookE,
+ FeatureMSYNC, DeprecatedMFTB]>;
def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, FeatureMSYNC,
- DeprecatedMFTB]>;
+ FeatureICBT, FeatureBookE,
+ FeatureMSYNC, DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
@@ -234,6 +282,7 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE]>;
def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE]>;
+
def : ProcessorModel<"970", G5Model,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt,
@@ -247,14 +296,14 @@ def : ProcessorModel<"g5", G5Model,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc, FeatureMFOCRF,
- FeatureSTFIWX, FeatureBookE, FeatureISEL,
- DeprecatedMFTB]>;
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureISEL, DeprecatedMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
- FeatureSTFIWX, FeatureBookE, FeatureISEL,
- DeprecatedMFTB]>;
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureISEL, DeprecatedMFTB]>;
def : ProcessorModel<"a2", PPCA2Model,
- [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
@@ -262,7 +311,7 @@ def : ProcessorModel<"a2", PPCA2Model,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */, DeprecatedMFTB]>;
def : ProcessorModel<"a2q", PPCA2Model,
- [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
@@ -303,35 +352,15 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit,
DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model,
- [DirectivePwr7, FeatureAltivec, FeatureVSX,
- FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
- FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */,
- DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr8", P8Model,
- [DirectivePwr8, FeatureAltivec, FeatureVSX, FeatureP8Vector,
- FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
- FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */,
- DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
+def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
-def : ProcessorModel<"ppc64le", G5Model,
- [Directive64, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
- FeatureFRSQRTE, FeatureSTFIWX,
- Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>;
//===----------------------------------------------------------------------===//
// Calling Conventions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 49fd2f904a24..4f1c3c73e710 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -68,13 +68,12 @@ namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
MapVector<MCSymbol*, MCSymbol*> TOC;
- const PPCSubtarget &Subtarget;
- uint64_t TOCLabelID;
+ const PPCSubtarget *Subtarget;
StackMaps SM;
public:
- explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer),
- Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0), SM(*this) {}
+ explicit PPCAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
const char *getPassName() const override {
return "PowerPC Assembly Printer";
@@ -99,13 +98,19 @@ namespace {
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
+ void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<PPCSubtarget>();
+ return AsmPrinter::runOnMachineFunction(MF);
+ }
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
class PPCLinuxAsmPrinter : public PPCAsmPrinter {
public:
- explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : PPCAsmPrinter(TM, Streamer) {}
+ explicit PPCLinuxAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
const char *getPassName() const override {
return "Linux PPC Assembly Printer";
@@ -124,8 +129,9 @@ namespace {
/// OS X
class PPCDarwinAsmPrinter : public PPCAsmPrinter {
public:
- explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : PPCAsmPrinter(TM, Streamer) {}
+ explicit PPCDarwinAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
const char *getPassName() const override {
return "Darwin PPC Assembly Printer";
@@ -144,6 +150,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
+ case 'q': // for QPX
case 'v':
if (RegName[1] == 's')
return RegName + 2;
@@ -156,7 +163,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
@@ -164,7 +171,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
- if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ if (!Subtarget->isDarwin())
+ RegName = stripRegisterPrefix(RegName);
O << RegName;
return;
}
@@ -279,7 +287,8 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
case 'y': // A memory reference for an X-form instruction
{
const char *RegName = "r0";
- if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ if (!Subtarget->isDarwin())
+ RegName = stripRegisterPrefix(RegName);
O << RegName << ", ";
printOperand(MI, OpNo, O);
return false;
@@ -311,17 +320,9 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
-
- // To avoid name clash check if the name already exists.
- while (!TOCEntry) {
- if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
- "C" + Twine(TOCLabelID++)) == nullptr) {
- TOCEntry = GetTempSymbol("C", TOCLabelID);
- }
- }
-
+ if (!TOCEntry)
+ TOCEntry = createTempSymbol("C");
return TOCEntry;
}
@@ -400,12 +401,45 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
}
+/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
+/// call to __tls_get_addr to the current output stream.
+void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
+ MCSymbolRefExpr::VariantKind VK) {
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+
+ assert(MI->getOperand(0).isReg() &&
+ ((Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
+ (!Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) &&
+ "GETtls[ld]ADDR[32] must define GPR3");
+ assert(MI->getOperand(1).isReg() &&
+ ((Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::X3) ||
+ (!Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
+ "GETtls[ld]ADDR[32] must read GPR3");
+
+ if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ Kind = MCSymbolRefExpr::VK_PLT;
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ?
+ PPC::BL8_NOP_TLS : PPC::BL_TLS)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+}
+
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
- bool isPPC64 = Subtarget.isPPC64();
+ bool isPPC64 = Subtarget->isPPC64();
bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
const Module *M = MF->getFunction()->getParent();
PICLevel::Level PL = M->getPICLevel();
@@ -416,9 +450,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
case TargetOpcode::STACKMAP:
- return LowerSTACKMAP(OutStreamer, SM, *MI);
+ return LowerSTACKMAP(*OutStreamer, SM, *MI);
case TargetOpcode::PATCHPOINT:
- return LowerPATCHPOINT(OutStreamer, SM, *MI);
+ return LowerPATCHPOINT(*OutStreamer, SM, *MI);
case PPC::MoveGOTtoLR: {
// Transform %LR = MoveGOTtoLR
@@ -428,7 +462,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// blrl
// This will return the pointer to _GLOBAL_OFFSET_TABLE_@local
MCSymbol *GOTSymbol =
- OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
const MCExpr *OffsExpr =
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol,
MCSymbolRefExpr::VK_PPC_LOCAL,
@@ -437,7 +471,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext);
// Emit the 'bl'.
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
return;
}
case PPC::MovePCtoLR:
@@ -449,13 +483,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// Emit the 'bl'.
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
// FIXME: We would like an efficient form for this, so we don't have to do
// a lot of extra uniquing.
.addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
- OutStreamer.EmitLabel(PICBase);
+ OutStreamer->EmitLabel(PICBase);
return;
}
case PPC::UpdateGBR: {
@@ -478,16 +512,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
TmpInst.getOperand(1) =
- MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
+ MCOperand::createExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
TmpInst.getOperand(0) = TR;
TmpInst.getOperand(2) = PICR;
- EmitToStreamer(OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
TmpInst.setOpcode(PPC::ADD4);
TmpInst.getOperand(0) = PICR;
TmpInst.getOperand(1) = TR;
TmpInst.getOperand(2) = PICR;
- EmitToStreamer(OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case PPC::LWZtoc: {
@@ -515,7 +549,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT,
OutContext);
- TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
} else {
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
@@ -523,12 +557,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
OutContext);
const MCExpr *PB =
- MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+ MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
OutContext);
Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
- TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
}
- EmitToStreamer(OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case PPC::LDtocJTI:
@@ -560,8 +594,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
OutContext);
- TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- EmitToStreamer(OutStreamer, TmpInst);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -607,8 +641,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
OutContext);
- TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
- EmitToStreamer(OutStreamer, TmpInst);
+ TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case PPC::LDtocL: {
@@ -649,8 +683,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
- TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- EmitToStreamer(OutStreamer, TmpInst);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case PPC::ADDItocL: {
@@ -683,24 +717,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
- TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
- EmitToStreamer(OutStreamer, TmpInst);
+ TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case PPC::ADDISgotTprelHA: {
// Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(PPC::X2)
- .addExpr(SymGotTprel));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTprel));
return;
}
case PPC::LDgotTprelL:
@@ -716,17 +750,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
OutContext);
- TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- EmitToStreamer(OutStreamer, TmpInst);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case PPC::PPC32PICGOT: {
- MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
- MCSymbol *GOTRef = OutContext.CreateTempSymbol();
- MCSymbol *NextInstr = OutContext.CreateTempSymbol();
+ MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ MCSymbol *GOTRef = OutContext.createTempSymbol();
+ MCSymbol *NextInstr = OutContext.createTempSymbol();
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
// FIXME: We would like an efficient form for this, so we don't have to do
// a lot of extra uniquing.
.addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
@@ -734,52 +768,52 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
MCSymbolRefExpr::Create(GOTRef, OutContext),
OutContext);
- OutStreamer.EmitLabel(GOTRef);
- OutStreamer.EmitValue(OffsExpr, 4);
- OutStreamer.EmitLabel(NextInstr);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR)
- .addReg(MI->getOperand(0).getReg()));
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ)
- .addReg(MI->getOperand(1).getReg())
- .addImm(0)
- .addReg(MI->getOperand(0).getReg()));
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addReg(MI->getOperand(0).getReg()));
+ OutStreamer->EmitLabel(GOTRef);
+ OutStreamer->EmitValue(OffsExpr, 4);
+ OutStreamer->EmitLabel(NextInstr);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LWZ)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADD4)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(0).getReg()));
return;
}
case PPC::PPC32GOT: {
- MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
const MCExpr *SymGotTlsL =
MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
OutContext);
const MCExpr *SymGotTlsHA =
MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI)
- .addReg(MI->getOperand(0).getReg())
- .addExpr(SymGotTlsL));
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(0).getReg())
- .addExpr(SymGotTlsHA));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI)
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsL));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsHA));
return;
}
case PPC::ADDIStlsgdHA: {
// Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(PPC::X2)
- .addExpr(SymGotTlsGD));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
return;
}
case PPC::ADDItlsgdL:
@@ -791,32 +825,40 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymGotTlsGD =
- MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
- MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO :
- MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
- OutContext);
- EmitToStreamer(OutStreamer,
- MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(
+ MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
+ OutContext);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsGD));
return;
}
+ case PPC::GETtlsADDR:
+ // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
+ case PPC::GETtlsADDR32: {
+ // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym>
+ // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
+ EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
+ return;
+ }
case PPC::ADDIStlsldHA: {
// Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(PPC::X2)
- .addExpr(SymGotTlsLD));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
return;
}
case PPC::ADDItlsldL:
@@ -828,16 +870,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymGotTlsLD =
- MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
- MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO :
- MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
- OutContext);
- EmitToStreamer(OutStreamer,
- MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymGotTlsLD));
+ const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(
+ MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
+ OutContext);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::GETtlsldADDR:
+ // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
+ case PPC::GETtlsldADDR32: {
+ // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym>
+ // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
+ EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
return;
}
case PPC::ADDISdtprelHA:
@@ -852,11 +902,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
- EmitToStreamer(OutStreamer,
- MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
- .addReg(MI->getOperand(0).getReg())
- .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3)
- .addExpr(SymDtprel));
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3)
+ .addExpr(SymDtprel));
return;
}
case PPC::ADDIdtprelL:
@@ -871,41 +922,41 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
- EmitToStreamer(OutStreamer,
- MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymDtprel));
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
return;
}
case PPC::MFOCRF:
case PPC::MFOCRF8:
- if (!Subtarget.hasMFOCRF()) {
+ if (!Subtarget->hasMFOCRF()) {
// Transform: %R3 = MFOCRF %CR7
// Into: %R3 = MFCR ;; cr7
unsigned NewOpcode =
MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
- OutStreamer.AddComment(PPCInstPrinter::
- getRegisterName(MI->getOperand(1).getReg()));
- EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
+ OutStreamer->AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(1).getReg()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(NewOpcode)
.addReg(MI->getOperand(0).getReg()));
return;
}
break;
case PPC::MTOCRF:
case PPC::MTOCRF8:
- if (!Subtarget.hasMFOCRF()) {
+ if (!Subtarget->hasMFOCRF()) {
// Transform: %CR7 = MTOCRF %R3
// Into: MTCRF mask, %R3 ;; cr7
unsigned NewOpcode =
MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8;
unsigned Mask = 0x80 >> OutContext.getRegisterInfo()
->getEncodingValue(MI->getOperand(0).getReg());
- OutStreamer.AddComment(PPCInstPrinter::
- getRegisterName(MI->getOperand(0).getReg()));
- EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
- .addImm(Mask)
- .addReg(MI->getOperand(1).getReg()));
+ OutStreamer->AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(0).getReg()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(NewOpcode)
+ .addImm(Mask)
+ .addReg(MI->getOperand(1).getReg()));
return;
}
break;
@@ -919,7 +970,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// suite shows a handful of test cases that fail this check for
// Darwin. Those need to be investigated before this sanity test
// can be enabled for those subtargets.
- if (!Subtarget.isDarwin()) {
+ if (!Subtarget->isDarwin()) {
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
@@ -931,32 +982,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
- EmitToStreamer(OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
}
void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (Subtarget.isELFv2ABI()) {
+ if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
PPCTargetStreamer *TS =
- static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
if (TS)
TS->emitAbiVersion(2);
}
- if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
+ if (static_cast<const PPCTargetMachine &>(TM).isPPC64() ||
+ TM.getRelocationModel() != Reloc::PIC_)
return AsmPrinter::EmitStartOfAsmFile(M);
if (M.getPICLevel() == PICLevel::Small)
return AsmPrinter::EmitStartOfAsmFile(M);
- OutStreamer.SwitchSection(OutContext.getELFSection(".got2",
- ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
- SectionKind::getReadOnly()));
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC));
- MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".LTOC"));
- MCSymbol *CurrentPos = OutContext.CreateTempSymbol();
+ MCSymbol *TOCSym = OutContext.getOrCreateSymbol(Twine(".LTOC"));
+ MCSymbol *CurrentPos = OutContext.createTempSymbol();
- OutStreamer.EmitLabel(CurrentPos);
+ OutStreamer->EmitLabel(CurrentPos);
// The GOT pointer points to the middle of the GOT, in order to reference the
// entire 64kB range. 0x8000 is the midpoint.
@@ -965,121 +1016,93 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
MCConstantExpr::Create(0x8000, OutContext),
OutContext);
- OutStreamer.EmitAssignment(TOCSym, tocExpr);
+ OutStreamer->EmitAssignment(TOCSym, tocExpr);
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
}
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// linux/ppc32 - Normal entry label.
- if (!Subtarget.isPPC64() &&
+ if (!Subtarget->isPPC64() &&
(TM.getRelocationModel() != Reloc::PIC_ ||
MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
return AsmPrinter::EmitFunctionEntryLabel();
- if (!Subtarget.isPPC64()) {
+ if (!Subtarget->isPPC64()) {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
if (PPCFI->usesPICBase()) {
MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
MCSymbol *PICBase = MF->getPICBaseSymbol();
- OutStreamer.EmitLabel(RelocSymbol);
+ OutStreamer->EmitLabel(RelocSymbol);
const MCExpr *OffsExpr =
MCBinaryExpr::CreateSub(
- MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+ MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
OutContext),
MCSymbolRefExpr::Create(PICBase, OutContext),
OutContext);
- OutStreamer.EmitValue(OffsExpr, 4);
- OutStreamer.EmitLabel(CurrentFnSym);
+ OutStreamer->EmitValue(OffsExpr, 4);
+ OutStreamer->EmitLabel(CurrentFnSym);
return;
} else
return AsmPrinter::EmitFunctionEntryLabel();
}
// ELFv2 ABI - Normal entry label.
- if (Subtarget.isELFv2ABI())
+ if (Subtarget->isELFv2ABI())
return AsmPrinter::EmitFunctionEntryLabel();
// Emit an official procedure descriptor.
- MCSectionSubPair Current = OutStreamer.getCurrentSection();
- const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
- ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
- SectionKind::getReadOnly());
- OutStreamer.SwitchSection(Section);
- OutStreamer.EmitLabel(CurrentFnSym);
- OutStreamer.EmitValueToAlignment(8);
- MCSymbol *Symbol1 =
- OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+ MCSectionELF *Section = OutStreamer->getContext().getELFSection(
+ ".opd", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ OutStreamer->SwitchSection(Section);
+ OutStreamer->EmitLabel(CurrentFnSym);
+ OutStreamer->EmitValueToAlignment(8);
+ MCSymbol *Symbol1 = CurrentFnSymForSize;
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
- 8 /*size*/);
- MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ OutStreamer->EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
+ 8 /*size*/);
+ MCSymbol *Symbol2 = OutContext.getOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
- MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
- 8/*size*/);
+ OutStreamer->EmitValue(
+ MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
+ 8/*size*/);
// Emit a null environment pointer.
- OutStreamer.EmitIntValue(0, 8 /* size */);
- OutStreamer.SwitchSection(Current.first, Current.second);
-
- MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
- ".L." + Twine(CurrentFnSym->getName()));
- OutStreamer.EmitLabel(RealFnSym);
- CurrentFnSymForSize = RealFnSym;
+ OutStreamer->EmitIntValue(0, 8 /* size */);
+ OutStreamer->SwitchSection(Current.first, Current.second);
}
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
bool isPPC64 = TD->getPointerSizeInBits() == 64;
PPCTargetStreamer &TS =
- static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer());
+ static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
if (!TOC.empty()) {
- const MCSectionELF *Section;
-
+ MCSectionELF *Section;
+
if (isPPC64)
- Section = OutStreamer.getContext().getELFSection(".toc",
- ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
- SectionKind::getReadOnly());
- else
- Section = OutStreamer.getContext().getELFSection(".got2",
- ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
- SectionKind::getReadOnly());
- OutStreamer.SwitchSection(Section);
+ Section = OutStreamer->getContext().getELFSection(
+ ".toc", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ else
+ Section = OutStreamer->getContext().getELFSection(
+ ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ OutStreamer->SwitchSection(Section);
for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
E = TOC.end(); I != E; ++I) {
- OutStreamer.EmitLabel(I->second);
+ OutStreamer->EmitLabel(I->second);
MCSymbol *S = I->first;
if (isPPC64)
TS.emitTCEntry(*S);
else
- OutStreamer.EmitSymbolValue(S, 4);
- }
- }
-
- MachineModuleInfoELF &MMIELF =
- MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .long _foo
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
- OutContext),
- isPPC64 ? 8 : 4/*size*/);
+ OutStreamer->EmitSymbolValue(S, 4);
}
-
- Stubs.clear();
- OutStreamer.AddBlankLine();
}
return AsmPrinter::doFinalization(M);
@@ -1103,36 +1126,36 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
//
// This ensures we have r2 set up correctly while executing the function
// body, no matter which entry point is called.
- if (Subtarget.isELFv2ABI()
+ if (Subtarget->isELFv2ABI()
// Only do all that if the function uses r2 in the first place.
&& !MF->getRegInfo().use_empty(PPC::X2)) {
- MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
- OutStreamer.EmitLabel(GlobalEntryLabel);
+ MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(GlobalEntryLabel);
const MCSymbolRefExpr *GlobalEntryLabelExp =
MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
- MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
const MCExpr *TOCDeltaExpr =
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
GlobalEntryLabelExp, OutContext);
const MCExpr *TOCDeltaHi =
PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
- .addReg(PPC::X2)
- .addReg(PPC::X12)
- .addExpr(TOCDeltaHi));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12)
+ .addExpr(TOCDeltaHi));
const MCExpr *TOCDeltaLo =
PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
- .addReg(PPC::X2)
- .addReg(PPC::X2)
- .addExpr(TOCDeltaLo));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addExpr(TOCDeltaLo));
- MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
- OutStreamer.EmitLabel(LocalEntryLabel);
+ MCSymbol *LocalEntryLabel = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(LocalEntryLabel);
const MCSymbolRefExpr *LocalEntryLabelExp =
MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
const MCExpr *LocalOffsetExp =
@@ -1140,7 +1163,7 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
GlobalEntryLabelExp, OutContext);
PPCTargetStreamer *TS =
- static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
if (TS)
TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
@@ -1158,9 +1181,9 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
// FIXME: We should fill in the eight-byte mandatory fields as described in
// the PPC64 ELF ABI (this is a low-priority item because GDB does not
// currently make use of these fields).
- if (Subtarget.isPPC64()) {
- OutStreamer.EmitIntValue(0, 4/*size*/);
- OutStreamer.EmitIntValue(0, 8/*size*/);
+ if (Subtarget->isPPC64()) {
+ OutStreamer->EmitIntValue(0, 4/*size*/);
+ OutStreamer->EmitIntValue(0, 8/*size*/);
}
}
@@ -1189,74 +1212,89 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppc64le"
};
- unsigned Directive = Subtarget.getDarwinDirective();
- if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970)
- Directive = PPC::DIR_970;
- if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
- Directive = PPC::DIR_7400;
- if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
- Directive = PPC::DIR_64;
+ // Get the numerically largest directive.
+ // FIXME: How should we merge darwin directives?
+ unsigned Directive = PPC::DIR_NONE;
+ for (const Function &F : M) {
+ const PPCSubtarget &STI = TM.getSubtarget<PPCSubtarget>(F);
+ unsigned FDir = STI.getDarwinDirective();
+ Directive = Directive > FDir ? FDir : STI.getDarwinDirective();
+ if (STI.hasMFOCRF() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (STI.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (STI.isPPC64() && Directive < PPC::DIR_64)
+ Directive = PPC::DIR_64;
+ }
+
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
assert(Directive < array_lengthof(CPUDirectives) &&
"CPUDirectives[] might not be up-to-date!");
PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+ *static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
TStreamer.emitMachine(CPUDirectives[Directive]);
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
- OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+ OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
if (TM.getRelocationModel() == Reloc::PIC_) {
- OutStreamer.SwitchSection(
+ OutStreamer->SwitchSection(
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
MachO::S_SYMBOL_STUBS |
MachO::S_ATTR_PURE_INSTRUCTIONS,
32, SectionKind::getText()));
} else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
- OutStreamer.SwitchSection(
+ OutStreamer->SwitchSection(
OutContext.getMachOSection("__TEXT","__symbol_stub1",
MachO::S_SYMBOL_STUBS |
MachO::S_ATTR_PURE_INSTRUCTIONS,
16, SectionKind::getText()));
}
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
}
static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
// Remove $stub suffix, add $lazy_ptr.
StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
- return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
+ return Ctx.getOrCreateSymbol(NoStub + "$lazy_ptr");
}
static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
// Add $tmp suffix to $stub, yielding $stub$tmp.
- return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
+ return Ctx.getOrCreateSymbol(Sym->getName() + "$tmp");
}
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
- bool isPPC64 =
- TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
- bool isDarwin = Subtarget.isDarwin();
-
+ bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+
+ // Construct a local MCSubtargetInfo and shadow EmitToStreamer here.
+ // This is because the MachineFunction won't exist (but have not yet been
+ // freed) and since we're at the global level we can use the default
+ // constructed subtarget.
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+ auto EmitToStreamer = [&STI] (MCStreamer &S, const MCInst &Inst) {
+ S.EmitInstruction(Inst, *STI);
+ };
+
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
// .lazy_symbol_pointer
- const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
-
+ MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+
// Output stubs for dynamically-linked functions
if (TM.getRelocationModel() == Reloc::PIC_) {
- const MCSection *StubSection =
- OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
- MachO::S_SYMBOL_STUBS |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- 32, SectionKind::getText());
+ MCSection *StubSection = OutContext.getMachOSection(
+ "__TEXT", "__picsymbolstub1",
+ MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 32,
+ SectionKind::getText());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.SwitchSection(StubSection);
+ OutStreamer->SwitchSection(StubSection);
EmitAlignment(4);
MCSymbol *Stub = Stubs[i].first;
@@ -1264,8 +1302,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
- OutStreamer.EmitLabel(Stub);
- OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ OutStreamer->EmitLabel(Stub);
+ OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
@@ -1273,110 +1311,108 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
// mflr r0
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
// bcl 20, 31, AnonSymbol
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon));
- OutStreamer.EmitLabel(AnonSymbol);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon));
+ OutStreamer->EmitLabel(AnonSymbol);
// mflr r11
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
// addis r11, r11, ha16(LazyPtr - AnonSymbol)
- const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, true, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
.addReg(PPC::R11)
.addReg(PPC::R11)
.addExpr(SubHa16));
// mtlr r0
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
// ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
// lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
- const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, true, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(SubLo16).addExpr(SubLo16)
.addReg(PPC::R11));
// mtctr r12
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
// bctr
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR));
- OutStreamer.SwitchSection(LSPSection);
- OutStreamer.EmitLabel(LazyPtr);
- OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ OutStreamer->SwitchSection(LSPSection);
+ OutStreamer->EmitLabel(LazyPtr);
+ OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
MCSymbol *DyldStubBindingHelper =
- OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
if (isPPC64) {
// .quad dyld_stub_binding_helper
- OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8);
} else {
// .long dyld_stub_binding_helper
- OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
}
}
- OutStreamer.AddBlankLine();
+ OutStreamer->AddBlankLine();
return;
}
-
- const MCSection *StubSection =
- OutContext.getMachOSection("__TEXT","__symbol_stub1",
- MachO::S_SYMBOL_STUBS |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- 16, SectionKind::getText());
+
+ MCSection *StubSection = OutContext.getMachOSection(
+ "__TEXT", "__symbol_stub1",
+ MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 16,
+ SectionKind::getText());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
MCSymbol *Stub = Stubs[i].first;
MCSymbol *RawSym = Stubs[i].second.getPointer();
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
- OutStreamer.SwitchSection(StubSection);
+ OutStreamer->SwitchSection(StubSection);
EmitAlignment(4);
- OutStreamer.EmitLabel(Stub);
- OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ OutStreamer->EmitLabel(Stub);
+ OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
// lis r11, ha16(LazyPtr)
const MCExpr *LazyPtrHa16 =
- PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LIS)
+ PPCMCExpr::CreateHa(LazyPtrExpr, true, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LIS)
.addReg(PPC::R11)
.addExpr(LazyPtrHa16));
// ldu r12, lo16(LazyPtr)(r11)
// lwzu r12, lo16(LazyPtr)(r11)
const MCExpr *LazyPtrLo16 =
- PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ PPCMCExpr::CreateLo(LazyPtrExpr, true, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
.addReg(PPC::R11));
// mtctr r12
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
// bctr
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR));
- OutStreamer.SwitchSection(LSPSection);
- OutStreamer.EmitLabel(LazyPtr);
- OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ OutStreamer->SwitchSection(LSPSection);
+ OutStreamer->EmitLabel(LazyPtr);
+ OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
MCSymbol *DyldStubBindingHelper =
- OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
if (isPPC64) {
// .quad dyld_stub_binding_helper
- OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8);
} else {
// .long dyld_stub_binding_helper
- OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
}
}
- OutStreamer.AddBlankLine();
+ OutStreamer->AddBlankLine();
}
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 =
- TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
// Darwin/PPC always uses mach-o.
const TargetLoweringObjectFileMachO &TLOFMacho =
@@ -1409,19 +1445,19 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// Output macho stubs for external and common global variables.
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
- OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
EmitAlignment(isPPC64 ? 3 : 2);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer->EmitLabel(Stubs[i].first);
// .indirect_symbol _foo
MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
- OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+ OutStreamer->EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
+ OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
else
// Internal to current translation unit.
//
@@ -1429,32 +1465,32 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// need to be indirect and pc-rel. We accomplish this by using NLPs.
// However, sometimes the types are local to the file. So we need to
// fill in the value for the NLP in those cases.
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
- OutContext),
+ OutStreamer->EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
- OutStreamer.AddBlankLine();
+ OutStreamer->AddBlankLine();
}
Stubs = MMIMacho.GetHiddenGVStubList();
if (!Stubs.empty()) {
- OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ OutStreamer->SwitchSection(getObjFileLowering().getDataSection());
EmitAlignment(isPPC64 ? 3 : 2);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer->EmitLabel(Stubs[i].first);
// .long _foo
- OutStreamer.EmitValue(MCSymbolRefExpr::
- Create(Stubs[i].second.getPointer(),
- OutContext),
- isPPC64 ? 8 : 4/*size*/);
+ OutStreamer->EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
- OutStreamer.AddBlankLine();
+ OutStreamer->AddBlankLine();
}
// Funny Darwin hack: This flag tells the linker that no global symbols
@@ -1462,7 +1498,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// implementation of multiple entry points). If this doesn't occur, the
// linker can safely perform dead code stripping. Since LLVM never generates
// code that does this, it is always safe to set.
- OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
return AsmPrinter::doFinalization(M);
}
@@ -1471,13 +1507,12 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
/// for a MachineFunction to the given output stream, in a format that the
/// Darwin assembler can deal with.
///
-static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
- MCStreamer &Streamer) {
- const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
-
- if (Subtarget->isDarwin())
- return new PPCDarwinAsmPrinter(tm, Streamer);
- return new PPCLinuxAsmPrinter(tm, Streamer);
+static AsmPrinter *
+createPPCAsmPrinterPass(TargetMachine &tm,
+ std::unique_ptr<MCStreamer> &&Streamer) {
+ if (Triple(tm.getTargetTriple()).isMacOSX())
+ return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
+ return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
}
// Force static initialization.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 167f3355eb27..69afd681d404 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -42,7 +43,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -94,8 +94,8 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
@@ -146,7 +146,7 @@ namespace {
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
@@ -168,12 +168,12 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
#endif // NDEBUG
bool PPCCTRLoops::runOnFunction(Function &F) {
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
+ DL = &F.getParent()->getDataLayout();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
bool MadeChange = false;
@@ -229,7 +229,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
+ const TargetLowering *TLI =
+ TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
@@ -399,7 +400,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
+ const TargetLowering *TLI =
+ TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
@@ -530,7 +532,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// selected branch.
MadeChange = true;
- SCEVExpander SCEVE(*SE, "loopcnt");
+ SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
LLVMContext &C = SE->getContext();
Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
Type::getInt32Ty(C);
@@ -551,7 +553,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
IRBuilder<> CondBuilder(CountedExitBranch);
Value *DecFunc =
Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
- Value *NewCond = CondBuilder.CreateCall(DecFunc);
+ Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
Value *OldCond = CountedExitBranch->getCondition();
CountedExitBranch->setCondition(NewCond);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 56176f145332..874a6fce0656 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -55,13 +55,18 @@ def RetCC_PPC : CallingConv<[
// only the ELFv2 ABI fully utilizes all these registers.
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
-
+
+ // QPX vectors are returned in QF1 and QF2.
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+
// Vector types returned as "direct" go into V2 .. V9; note that only the
// ELFv2 ABI fully utilizes all these registers.
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
- CCIfType<[v2f64, v2i64],
- CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
+ CCIfSubtarget<"hasAltivec()",
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
]>;
// No explicit register is specified for the AnyReg calling convention. The
@@ -108,10 +113,13 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
- CCIfType<[v2f64, v2i64],
- CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
+ CCIfSubtarget<"hasAltivec()",
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
]>;
//===----------------------------------------------------------------------===//
@@ -144,6 +152,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
// alignment and size as doubles.
CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
+ // QPX vectors that are stored in double precision need 32-byte alignment.
+ CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
+
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
]>;
@@ -158,12 +169,17 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
// put vector arguments in vector registers before putting them on the stack.
def CC_PPC32_SVR4 : CallingConv<[
+ // QPX vectors mirror the scalar FP convention.
+ CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
+ CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
+
// The first 12 Vector arguments are passed in AltiVec registers.
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
- CCIfType<[v2f64, v2i64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
+ CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
+ V8, V9, V10, V11, V12, V13]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
- VSH10, VSH11, VSH12, VSH13]>>,
+ VSH10, VSH11, VSH12, VSH13]>>>,
CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
@@ -224,9 +240,12 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
-
def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
+def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>;
+
+def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>;
+
def CSR_NoRegs : CalleeSavedRegs<(add)>;
def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
new file mode 100644
index 000000000000..fc89753ed94e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -0,0 +1,202 @@
+//===------------- PPCEarlyReturn.cpp - Form Early Returns ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that form early (predicated) returns. If-conversion handles some of
+// this, but this pass picks up some remaining cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-early-ret"
+STATISTIC(NumBCLR, "Number of early conditional returns");
+STATISTIC(NumBLR, "Number of early returns");
+
+namespace llvm {
+ void initializePPCEarlyReturnPass(PassRegistry&);
+}
+
+namespace {
+ // PPCEarlyReturn pass - For simple functions without epilogue code, move
+ // returns up, and create conditional returns, to avoid unnecessary
+ // branch-to-blr sequences.
+ struct PPCEarlyReturn : public MachineFunctionPass {
+ static char ID;
+ PPCEarlyReturn() : MachineFunctionPass(ID) {
+ initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
+ }
+
+ const TargetInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &ReturnMBB) {
+ bool Changed = false;
+
+ MachineBasicBlock::iterator I = ReturnMBB.begin();
+ I = ReturnMBB.SkipPHIsAndLabels(I);
+
+ // The block must be essentially empty except for the blr.
+ if (I == ReturnMBB.end() ||
+ (I->getOpcode() != PPC::BLR && I->getOpcode() != PPC::BLR8) ||
+ I != ReturnMBB.getLastNonDebugInstr())
+ return Changed;
+
+ SmallVector<MachineBasicBlock*, 8> PredToRemove;
+ for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
+ PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+ bool OtherReference = false, BlockChanged = false;
+ for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
+ MachineInstrBuilder MIB;
+ if (J->getOpcode() == PPC::B) {
+ if (J->getOperand(0).getMBB() == &ReturnMBB) {
+ // This is an unconditional branch to the return. Replace the
+ // branch with a blr.
+ MIB =
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()));
+ MIB.copyImplicitOps(I);
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBLR;
+ continue;
+ }
+ } else if (J->getOpcode() == PPC::BCC) {
+ if (J->getOperand(2).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ MIB = BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+ .addImm(J->getOperand(0).getImm())
+ .addReg(J->getOperand(1).getReg());
+ MIB.copyImplicitOps(I);
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
+ } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) {
+ if (J->getOperand(1).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ MIB = BuildMI(**PI, J, J->getDebugLoc(),
+ TII->get(J->getOpcode() == PPC::BC ?
+ PPC::BCLR : PPC::BCLRn))
+ .addReg(J->getOperand(0).getReg());
+ MIB.copyImplicitOps(I);
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
+ } else if (J->isBranch()) {
+ if (J->isIndirectBranch()) {
+ if (ReturnMBB.hasAddressTaken())
+ OtherReference = true;
+ } else
+ for (unsigned i = 0; i < J->getNumOperands(); ++i)
+ if (J->getOperand(i).isMBB() &&
+ J->getOperand(i).getMBB() == &ReturnMBB)
+ OtherReference = true;
+ } else if (!J->isTerminator() && !J->isDebugValue())
+ break;
+
+ if (J == (*PI)->begin())
+ break;
+
+ --J;
+ }
+
+ if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+ OtherReference = true;
+
+ // Predecessors are stored in a vector and can't be removed here.
+ if (!OtherReference && BlockChanged) {
+ PredToRemove.push_back(*PI);
+ }
+
+ if (BlockChanged)
+ Changed = true;
+ }
+
+ for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
+ PredToRemove[i]->removeSuccessor(&ReturnMBB);
+
+ if (Changed && !ReturnMBB.hasAddressTaken()) {
+ // We now might be able to merge this blr-only block into its
+ // by-layout predecessor.
+ if (ReturnMBB.pred_size() == 1) {
+ MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
+ if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
+ // Move the blr into the preceding block.
+ PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
+ PrevMBB.removeSuccessor(&ReturnMBB);
+ }
+ }
+
+ if (ReturnMBB.pred_empty())
+ ReturnMBB.eraseFromParent();
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool Changed = false;
+
+ // If the function does not have at least two blocks, then there is
+ // nothing to do.
+ if (MF.size() < 2)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
+ "PowerPC Early-Return Creation", false, false)
+
+char PPCEarlyReturn::ID = 0;
+FunctionPass*
+llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 13bd0c7be2c0..0b8e23c4ebf8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCCallingConv.h"
#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Optional.h"
@@ -85,18 +86,20 @@ typedef struct Address {
class PPCFastISel final : public FastISel {
const TargetMachine &TM;
+ const PPCSubtarget *PPCSubTarget;
+ PPCFunctionInfo *PPCFuncInfo;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
- const PPCSubtarget *PPCSubTarget;
LLVMContext *Context;
public:
explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
- TII(*TM.getSubtargetImpl()->getInstrInfo()),
- TLI(*TM.getSubtargetImpl()->getTargetLowering()),
- PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
+ PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
+ PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
+ TII(*PPCSubTarget->getInstrInfo()),
+ TLI(*PPCSubTarget->getTargetLowering()),
Context(&FuncInfo.Fn->getContext()) {}
// Backend specific FastISel code.
@@ -141,6 +144,7 @@ class PPCFastISel final : public FastISel {
private:
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool isValueAvailable(const Value *V) const;
bool isVSFRCRegister(unsigned Register) const {
return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
}
@@ -280,6 +284,17 @@ bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
return false;
}
+bool PPCFastISel::isValueAvailable(const Value *V) const {
+ if (!isa<Instruction>(V))
+ return true;
+
+ const auto *I = cast<Instruction>(V);
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ return true;
+
+ return false;
+}
+
// Given a value Obj, create an Address object Addr that represents its
// address. Return false if we can't handle it.
bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
@@ -672,8 +687,18 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
case PPC::STFS: Opc = PPC::STFSX; break;
case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+
+ auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg);
+
+ // If we have an index register defined we use it in the store inst,
+ // otherwise we use X0 as base as it makes the vector instructions to
+ // use zero in the computation of the effective address regardless the
+ // content of the register.
+ if (IndexReg)
+ MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
+ else
+ MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
}
return true;
@@ -718,30 +743,31 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
// For now, just try the simplest case where it's fed by a compare.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
- if (!OptPPCPred)
- return false;
-
- PPC::Predicate PPCPred = OptPPCPred.getValue();
+ if (isValueAvailable(CI)) {
+ Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
+ if (!OptPPCPred)
+ return false;
- // Take advantage of fall-through opportunities.
- if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
- std::swap(TBB, FBB);
- PPCPred = PPC::InvertPredicate(PPCPred);
- }
+ PPC::Predicate PPCPred = OptPPCPred.getValue();
- unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+ // Take advantage of fall-through opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ PPCPred = PPC::InvertPredicate(PPCPred);
+ }
- if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
- CondReg))
- return false;
+ unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
- BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
- .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
- return true;
+ if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CondReg))
+ return false;
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
+ .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
+ fastEmitBranch(FBB, DbgLoc);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
} else if (const ConstantInt *CI =
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
@@ -945,6 +971,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
}
// Attempt to fast-select an integer-to-floating-point conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+// direct moves should be implemented.
bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
MVT DstVT;
Type *DstTy = I->getType();
@@ -1052,6 +1080,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
}
// Attempt to fast-select a floating-point-to-integer conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+// direct moves should be implemented.
bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
MVT DstVT, SrcVT;
Type *DstTy = I->getType();
@@ -1234,9 +1264,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
// Reserve space for the linkage area on the stack.
- bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
- isELFv2ABI);
+ unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, 8);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
@@ -1275,7 +1303,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Prepare to assign register arguments. Every argument uses up a
// GPR protocol register even if it's passed in a floating-point
- // register.
+ // register (unless we're using the fast calling convention).
unsigned NextGPR = PPC::X3;
unsigned NextFPR = PPC::F1;
@@ -1325,7 +1353,8 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
unsigned ArgReg;
if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
ArgReg = NextFPR++;
- ++NextGPR;
+ if (CC != CallingConv::Fast)
+ ++NextGPR;
} else
ArgReg = NextGPR++;
@@ -1432,6 +1461,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
RetVT != MVT::i8)
return false;
+ else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+ // We can't handle boolean returns when CR bits are in use.
+ return false;
// FIXME: No multi-register return values yet.
if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
@@ -1523,13 +1555,14 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
MIB.addReg(RegArgs[II], RegState::Implicit);
- // Direct calls in the ELFv2 ABI need the TOC register live into the call.
- if (PPCSubTarget->isELFv2ABI())
- MIB.addReg(PPC::X2, RegState::Implicit);
+ // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
+ // into the call.
+ PPCFuncInfo->setUsesTOCBasePtr();
+ MIB.addReg(PPC::X2, RegState::Implicit);
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
@@ -1863,6 +1896,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ PPCFuncInfo->setUsesTOCBasePtr();
// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
@@ -1912,6 +1946,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
if (GV->isThreadLocal())
return 0;
+ PPCFuncInfo->setUsesTOCBasePtr();
// For small code model, generate a simple TOC load.
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
@@ -2297,13 +2332,10 @@ namespace llvm {
// Create the fast instruction selector for PowerPC64 ELF.
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
- const TargetMachine &TM = FuncInfo.MF->getTarget();
-
// Only available on 64-bit ELF for now.
- const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
- if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
+ const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
return new PPCFastISel(FuncInfo, LibInfo);
-
return nullptr;
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 1b850478c88c..b4008e4a886a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -16,6 +16,7 @@
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -36,10 +37,58 @@ static const uint16_t VRRegNo[] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
+ if (STI.isDarwinABI())
+ return STI.isPPC64() ? 16 : 8;
+ // SVR4 ABI:
+ return STI.isPPC64() ? 16 : 4;
+}
+
+static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
+ return STI.isELFv2ABI() ? 24 : 40;
+}
+
+static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
+ // For the Darwin ABI:
+ // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+ // for saving the frame pointer (if needed.) While the published ABI has
+ // not used this slot since at least MacOSX 10.2, there is older code
+ // around that does use it, and that needs to continue to work.
+ if (STI.isDarwinABI())
+ return STI.isPPC64() ? -8U : -4U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return STI.isPPC64() ? -8U : -4U;
+}
+
+static unsigned computeLinkageSize(const PPCSubtarget &STI) {
+ if (STI.isDarwinABI() || STI.isPPC64())
+ return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
+
+ // SVR4 ABI:
+ return 8;
+}
+
+static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
+ if (STI.isDarwinABI())
+ return STI.isPPC64() ? -16U : -8U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return STI.isPPC64()
+ ? -16U
+ : (STI.getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ ? -12U
+ : -8U;
+}
+
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
- Subtarget(STI) {}
+ STI.getPlatformStackAlignment(), 0),
+ Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
+ TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
+ FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
+ LinkageSize(computeLinkageSize(Subtarget)),
+ BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
@@ -355,6 +404,20 @@ static bool hasNonRISpills(const MachineFunction &MF) {
return FuncInfo->hasNonRISpills();
}
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
@@ -372,15 +435,15 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
// to adjust the stack pointer (we fit in the Red Zone).
// The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
// stackless code if all local vars are reg-allocated.
- bool DisableRedZone = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
+ bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+ unsigned LR = RegInfo->getRARegister();
if (!DisableRedZone &&
(Subtarget.isPPC64() || // 32-bit SVR4, no stack-
!Subtarget.isSVR4ABI() || // allocated locals.
@@ -388,6 +451,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
+ !MustSaveLR(MF, LR) &&
!RegInfo->hasBasePointer(MF)) { // No special alignment.
// No need for frame
if (UpdateMF)
@@ -399,9 +463,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
// Maximum call frame needs to be at least big enough for linkage area.
- unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
- Subtarget.isDarwinABI(),
- Subtarget.isELFv2ABI());
+ unsigned minCallFrameSize = getLinkageSize();
maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
@@ -444,8 +506,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
// Naked functions have no stack frame pushed, so we don't have a frame
// pointer.
- if (MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::Naked))
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
@@ -461,7 +522,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
@@ -494,29 +555,28 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
}
}
-void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+void PPCFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
bool needsCFI = MMI.hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
- bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
- bool isDarwinABI = Subtarget.isDarwinABI();
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((isDarwinABI || isSVR4ABI) &&
+ assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
"Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
@@ -582,7 +642,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
- int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+ int LROffset = getReturnSaveOffset();
int FPOffset = 0;
if (HasFP) {
@@ -592,8 +652,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(FPIndex && "No Frame Pointer Save Slot!");
FPOffset = FFI->getObjectOffset(FPIndex);
} else {
- FPOffset =
- PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ FPOffset = getFramePointerSaveOffset();
}
}
@@ -605,10 +664,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(BPIndex && "No Base Pointer Save Slot!");
BPOffset = FFI->getObjectOffset(BPIndex);
} else {
- BPOffset =
- PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
- isDarwinABI,
- isPIC);
+ BPOffset = getBasePointerSaveOffset();
}
}
@@ -864,9 +920,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no terminator");
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl;
@@ -890,9 +946,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
- bool isDarwinABI = Subtarget.isDarwinABI();
bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -920,7 +974,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
: PPC::ADD4 );
- int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+ int LROffset = getReturnSaveOffset();
int FPOffset = 0;
if (HasFP) {
@@ -930,8 +984,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
assert(FPIndex && "No Frame Pointer Save Slot!");
FPOffset = FFI->getObjectOffset(FPIndex);
} else {
- FPOffset =
- PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ FPOffset = getFramePointerSaveOffset();
}
}
@@ -943,10 +996,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
assert(BPIndex && "No Base Pointer Save Slot!");
BPOffset = FFI->getObjectOffset(BPIndex);
} else {
- BPOffset =
- PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
- isDarwinABI,
- isPIC);
+ BPOffset = getBasePointerSaveOffset();
}
}
@@ -1108,25 +1158,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-/// MustSaveLR - Return true if this function requires that we save the LR
-/// register onto the stack in the prolog and restore it in the epilog of the
-/// function.
-static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
- const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
-
- // We need a save/restore of LR if there is any def of LR (which is
- // defined by calls, including the PIC setup sequence), or if there is
- // some use of the LR stack slot (e.g. for builtin_return_address).
- // (LR comes in 32 and 64 bit versions.)
- MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
- return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
-}
-
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1139,13 +1175,12 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FPSI = FI->getFramePointerSaveIndex();
bool isPPC64 = Subtarget.isPPC64();
bool isDarwinABI = Subtarget.isDarwinABI();
- bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
MachineFrameInfo *MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
if (!FPSI && needsFP(MF)) {
// Find out what the fix offset of the frame pointer save area.
- int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ int FPOffset = getFramePointerSaveOffset();
// Allocate the frame index for frame pointer save area.
FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
@@ -1154,7 +1189,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int BPSI = FI->getBasePointerSaveIndex();
if (!BPSI && RegInfo->hasBasePointer(MF)) {
- int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC);
+ int BPOffset = getBasePointerSaveOffset();
// Allocate the frame index for the base pointer save area.
BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
// Save the result.
@@ -1266,7 +1301,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
int64_t LowerBound = 0;
@@ -1310,7 +1345,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
if (RegInfo->hasBasePointer(MF)) {
HasGPSaveArea = true;
@@ -1458,7 +1493,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -1519,8 +1554,7 @@ restoreCRs(bool isPPC64, bool is31,
const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
MachineFunction *MF = MBB.getParent();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
+ const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
DebugLoc DL;
unsigned RestoreOp, MoveReg;
@@ -1552,8 +1586,7 @@ restoreCRs(bool isPPC64, bool is31,
void PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
I->getOpcode() == PPC::ADJCALLSTACKUP) {
// Add (actually subtract) back the amount the callee popped on return.
@@ -1603,7 +1636,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index c4825882182d..28d074ecd79d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -23,6 +23,11 @@ class PPCSubtarget;
class PPCFrameLowering: public TargetFrameLowering {
const PPCSubtarget &Subtarget;
+ const unsigned ReturnSaveOffset;
+ const unsigned TOCSaveOffset;
+ const unsigned FramePointerSaveOffset;
+ const unsigned LinkageSize;
+ const unsigned BasePointerSaveOffset;
public:
PPCFrameLowering(const PPCSubtarget &STI);
@@ -33,7 +38,7 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const override;
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
@@ -67,56 +72,23 @@ public:
/// getReturnSaveOffset - Return the previous frame offset to save the
/// return address.
- static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
- if (isDarwinABI)
- return isPPC64 ? 16 : 8;
- // SVR4 ABI:
- return isPPC64 ? 16 : 4;
- }
+ unsigned getReturnSaveOffset() const { return ReturnSaveOffset; }
/// getTOCSaveOffset - Return the previous frame offset to save the
/// TOC register -- 64-bit SVR4 ABI only.
- static unsigned getTOCSaveOffset(bool isELFv2ABI) {
- return isELFv2ABI ? 24 : 40;
- }
+ unsigned getTOCSaveOffset() const { return TOCSaveOffset; }
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
- static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
- // For the Darwin ABI:
- // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
- // for saving the frame pointer (if needed.) While the published ABI has
- // not used this slot since at least MacOSX 10.2, there is older code
- // around that does use it, and that needs to continue to work.
- if (isDarwinABI)
- return isPPC64 ? -8U : -4U;
-
- // SVR4 ABI: First slot in the general register save area.
- return isPPC64 ? -8U : -4U;
- }
+ unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; }
/// getBasePointerSaveOffset - Return the previous frame offset to save the
/// base pointer.
- static unsigned getBasePointerSaveOffset(bool isPPC64,
- bool isDarwinABI,
- bool isPIC) {
- if (isDarwinABI)
- return isPPC64 ? -16U : -8U;
-
- // SVR4 ABI: First slot in the general register save area.
- return isPPC64 ? -16U : isPIC ? -12U : -8U;
- }
+ unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; }
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
- static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI,
- bool isELFv2ABI) {
- if (isDarwinABI || isPPC64)
- return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4);
-
- // SVR4 ABI:
- return 8;
- }
+ unsigned getLinkageSize() const { return LinkageSize; }
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index d9b242cad265..7234e30fa73e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -160,7 +160,7 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
// new group.
if (isLoadAfterStore(SU) && CurSlots < 6) {
unsigned Directive =
- DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
// If we're using a special group-terminating nop, then we need only one.
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
Directive == PPC::DIR_PWR8 )
@@ -220,7 +220,7 @@ void PPCDispatchGroupSBHazardRecognizer::Reset() {
void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
unsigned Directive =
- DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
// If the group has now filled all of its slots, or if we're using a special
// group-terminating nop, the group is complete.
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 75ab3430b49d..afc1f36ad152 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -42,11 +42,15 @@ using namespace llvm;
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
-cl::opt<bool> UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
- cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden);
-cl::opt<bool> BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates",
- cl::desc("stress rotate selection in aggressive ppc isel for "
- "bit permutations"), cl::Hidden);
+static cl::opt<bool>
+ UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
+ cl::desc("use aggressive ppc isel for bit permutations"),
+ cl::Hidden);
+static cl::opt<bool> BPermRewriterNoMasking(
+ "ppc-bit-perm-rewriter-stress-rotates",
+ cl::desc("stress rotate selection in aggressive ppc isel for "
+ "bit permutations"),
+ cl::Hidden);
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
@@ -59,22 +63,20 @@ namespace {
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
- const PPCTargetLowering *PPCLowering;
const PPCSubtarget *PPCSubTarget;
+ const PPCTargetLowering *PPCLowering;
unsigned GlobalBaseReg;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
- : SelectionDAGISel(tm), TM(tm),
- PPCLowering(TM.getSubtargetImpl()->getTargetLowering()),
- PPCSubTarget(TM.getSubtargetImpl()) {
+ : SelectionDAGISel(tm), TM(tm) {
initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
- PPCLowering = TM.getSubtargetImpl()->getTargetLowering();
- PPCSubTarget = TM.getSubtargetImpl();
+ PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
+ PPCLowering = PPCSubTarget->getTargetLowering();
SelectionDAGISel::runOnMachineFunction(MF);
if (!PPCSubTarget->isSVR4ABI())
@@ -88,28 +90,21 @@ namespace {
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
- inline SDValue getI32Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
+ inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
}
/// getI64Imm - Return a target constant with the specified value, of type
/// i64.
- inline SDValue getI64Imm(uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i64);
+ inline SDValue getI64Imm(uint64_t Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
}
/// getSmallIPtrImm - Return a target constant of pointer type.
- inline SDValue getSmallIPtrImm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
+ inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(Imm, dl, PPCLowering->getPointerTy());
}
- /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
- /// with any number of 0s on either side. The 1s are allowed to wrap from
- /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
- /// 0x0F0F0000 is not, since all 1s are not contiguous.
- static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
-
-
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
/// rotate and mask opcode and mask operation.
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
@@ -184,20 +179,35 @@ namespace {
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override {
- // We need to make sure that this one operand does not end up in r0
- // (because we might end up lowering this as 0(%op)).
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
- const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
- SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
- SDValue NewOp =
- SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- SDLoc(Op), Op.getValueType(),
- Op, RC), 0);
-
- OutOps.push_back(NewOp);
- return false;
+
+ switch(ConstraintID) {
+ default:
+ errs() << "ConstraintID: " << ConstraintID << "\n";
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_es:
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_Z:
+ case InlineAsm::Constraint_Zy:
+ // We need to make sure that this one operand does not end up in r0
+ // (because we might end up lowering this as 0(%op)).
+ const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
+ const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+ SDLoc dl(Op);
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
+ SDValue NewOp =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op.getValueType(),
+ Op, RC), 0);
+
+ OutOps.push_back(NewOp);
+ return false;
+ }
+ return true;
}
void InsertVRSaveCode(MachineFunction &MF);
@@ -221,6 +231,8 @@ private:
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
+
+ SDNode *transferMemOperands(SDNode *N, SDNode *Result);
};
}
@@ -258,7 +270,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
// Emit the following code into the entry block:
@@ -294,7 +306,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
@@ -313,7 +325,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl,
- TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
+ TII.get(PPC::UpdateGBR), GlobalBaseReg)
.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
}
@@ -395,33 +407,9 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
if (SN->hasOneUse())
return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
- getSmallIPtrImm(Offset));
+ getSmallIPtrImm(Offset, dl));
return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
- getSmallIPtrImm(Offset));
-}
-
-bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
- if (!Val)
- return false;
-
- if (isShiftedMask_32(Val)) {
- // look for the first non-zero bit
- MB = countLeadingZeros(Val);
- // look for the first zero bit after the run of ones
- ME = countLeadingZeros((Val - 1) ^ Val);
- return true;
- } else {
- Val = ~Val; // invert mask
- if (isShiftedMask_32(Val)) {
- // effectively look for the first zero bit
- ME = countLeadingZeros(Val) - 1;
- // effectively look for the first one bit after the run of zeros
- MB = countLeadingZeros((Val - 1) ^ Val) + 1;
- return true;
- }
- }
- // no run present
- return false;
+ getSmallIPtrImm(Offset, dl));
}
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
@@ -536,8 +524,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
}
SH &= 31;
- SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
- getI32Imm(ME) };
+ SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
}
}
@@ -665,8 +653,8 @@ static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
unsigned Lo = Imm & 0xFFFF;
unsigned Hi = (Imm >> 16) & 0xFFFF;
- auto getI32Imm = [CurDAG](unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
+ auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
};
// Simple value.
@@ -756,8 +744,8 @@ static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
if (!RMin)
return SelectInt64Direct(CurDAG, dl, Imm);
- auto getI32Imm = [CurDAG](unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
+ auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
};
SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0);
@@ -1207,8 +1195,8 @@ class BitPermutationSelector {
}
}
- SDValue getI32Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
+ SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
}
uint64_t getZerosMask() {
@@ -1280,7 +1268,8 @@ class BitPermutationSelector {
SDValue VRot;
if (VRI.RLAmt) {
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt), getI32Imm(0), getI32Imm(31) };
+ { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
+ getI32Imm(31, dl) };
VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
Ops), 0);
} else {
@@ -1290,10 +1279,10 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
- VRot, getI32Imm(ANDIMask)), 0);
+ VRot, getI32Imm(ANDIMask, dl)), 0);
if (ANDISMask != 0)
ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
- VRot, getI32Imm(ANDISMask)), 0);
+ VRot, getI32Imm(ANDISMask, dl)), 0);
SDValue TotalVal;
if (!ANDIVal)
@@ -1339,8 +1328,10 @@ class BitPermutationSelector {
if (VRI.RLAmt) {
if (InstCnt) *InstCnt += 1;
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt), getI32Imm(0), getI32Imm(31) };
- Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
+ getI32Imm(31, dl) };
+ Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
+ 0);
} else {
Res = VRI.V;
}
@@ -1360,13 +1351,15 @@ class BitPermutationSelector {
for (auto &BG : BitGroups) {
if (!Res) {
SDValue Ops[] =
- { BG.V, getI32Imm(BG.RLAmt), getI32Imm(Bits.size() - BG.EndIdx - 1),
- getI32Imm(Bits.size() - BG.StartIdx - 1) };
+ { BG.V, getI32Imm(BG.RLAmt, dl),
+ getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
+ getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
} else {
SDValue Ops[] =
- { Res, BG.V, getI32Imm(BG.RLAmt), getI32Imm(Bits.size() - BG.EndIdx - 1),
- getI32Imm(Bits.size() - BG.StartIdx - 1) };
+ { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+ getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
+ getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
}
}
@@ -1385,10 +1378,10 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
- Res, getI32Imm(ANDIMask)), 0);
+ Res, getI32Imm(ANDIMask, dl)), 0);
if (ANDISMask != 0)
ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
- Res, getI32Imm(ANDISMask)), 0);
+ Res, getI32Imm(ANDISMask, dl)), 0);
if (!ANDIVal)
Res = ANDISVal;
@@ -1439,27 +1432,27 @@ class BitPermutationSelector {
assert(InstMaskStart >= 32 && "Mask cannot start out of range");
assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
SDValue Ops[] =
- { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart - 32),
- getI32Imm(InstMaskEnd - 32) };
+ { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
+ getI32Imm(InstMaskEnd - 32, dl) };
return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
Ops), 0);
}
if (InstMaskEnd == 63) {
SDValue Ops[] =
- { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+ { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
}
if (InstMaskStart == 0) {
SDValue Ops[] =
- { V, getI32Imm(RLAmt), getI32Imm(InstMaskEnd) };
+ { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) };
return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
}
if (InstMaskEnd == 63 - RLAmt) {
SDValue Ops[] =
- { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+ { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
}
@@ -1500,15 +1493,15 @@ class BitPermutationSelector {
assert(InstMaskStart >= 32 && "Mask cannot start out of range");
assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
SDValue Ops[] =
- { Base, V, getI32Imm(RLAmt), getI32Imm(InstMaskStart - 32),
- getI32Imm(InstMaskEnd - 32) };
+ { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
+ getI32Imm(InstMaskEnd - 32, dl) };
return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
Ops), 0);
}
if (InstMaskEnd == 63 - RLAmt) {
SDValue Ops[] =
- { Base, V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+ { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
}
@@ -1655,10 +1648,10 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
- VRot, getI32Imm(ANDIMask)), 0);
+ VRot, getI32Imm(ANDIMask, dl)), 0);
if (ANDISMask != 0)
ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
- VRot, getI32Imm(ANDISMask)), 0);
+ VRot, getI32Imm(ANDISMask, dl)), 0);
if (!ANDIVal)
TotalVal = ANDISVal;
@@ -1805,10 +1798,10 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
- Res, getI32Imm(ANDIMask)), 0);
+ Res, getI32Imm(ANDIMask, dl)), 0);
if (ANDISMask != 0)
ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
- Res, getI32Imm(ANDISMask)), 0);
+ Res, getI32Imm(ANDISMask, dl)), 0);
if (!ANDIVal)
Res = ANDISVal;
@@ -1953,11 +1946,13 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
if (isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
- getI32Imm(Imm & 0xFFFF)), 0);
+ getI32Imm(Imm & 0xFFFF, dl)),
+ 0);
// If this is a 16-bit signed immediate, fold it.
if (isInt<16>((int)Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
- getI32Imm(Imm & 0xFFFF)), 0);
+ getI32Imm(Imm & 0xFFFF, dl)),
+ 0);
// For non-equality comparisons, the default code would materialize the
// constant, then compare against it, like this:
@@ -1969,21 +1964,22 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
// cmplwi cr0,r0,0x5678
// beq cr0,L6
SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
- getI32Imm(Imm >> 16)), 0);
+ getI32Imm(Imm >> 16, dl)), 0);
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
- getI32Imm(Imm & 0xFFFF)), 0);
+ getI32Imm(Imm & 0xFFFF, dl)), 0);
}
Opc = PPC::CMPLW;
} else if (ISD::isUnsignedIntSetCC(CC)) {
if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
- getI32Imm(Imm & 0xFFFF)), 0);
+ getI32Imm(Imm & 0xFFFF, dl)), 0);
Opc = PPC::CMPLW;
} else {
short SImm;
if (isIntS16Immediate(RHS, SImm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
- getI32Imm((int)SImm & 0xFFFF)),
+ getI32Imm((int)SImm & 0xFFFF,
+ dl)),
0);
Opc = PPC::CMPW;
}
@@ -1994,11 +1990,13 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
if (isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
- getI32Imm(Imm & 0xFFFF)), 0);
+ getI32Imm(Imm & 0xFFFF, dl)),
+ 0);
// If this is a 16-bit signed immediate, fold it.
if (isInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
- getI32Imm(Imm & 0xFFFF)), 0);
+ getI32Imm(Imm & 0xFFFF, dl)),
+ 0);
// For non-equality comparisons, the default code would materialize the
// constant, then compare against it, like this:
@@ -2011,22 +2009,23 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
// beq cr0,L6
if (isUInt<32>(Imm)) {
SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
- getI64Imm(Imm >> 16)), 0);
+ getI64Imm(Imm >> 16, dl)), 0);
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
- getI64Imm(Imm & 0xFFFF)), 0);
+ getI64Imm(Imm & 0xFFFF, dl)),
+ 0);
}
}
Opc = PPC::CMPLD;
} else if (ISD::isUnsignedIntSetCC(CC)) {
if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
- getI64Imm(Imm & 0xFFFF)), 0);
+ getI64Imm(Imm & 0xFFFF, dl)), 0);
Opc = PPC::CMPLD;
} else {
short SImm;
if (isIntS16Immediate(RHS, SImm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
- getI64Imm(SImm & 0xFFFF)),
+ getI64Imm(SImm & 0xFFFF, dl)),
0);
Opc = PPC::CMPD;
}
@@ -2101,7 +2100,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
-// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool HasVSX, bool &Swap, bool &Negate) {
Swap = false;
@@ -2180,6 +2179,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPEQUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPEQUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPEQUD;
break;
case ISD::SETGT:
if (VecVT == MVT::v16i8)
@@ -2188,6 +2189,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTSH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTSD;
break;
case ISD::SETUGT:
if (VecVT == MVT::v16i8)
@@ -2196,6 +2199,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTUD;
break;
default:
break;
@@ -2222,26 +2227,29 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
default: break;
case ISD::SETEQ: {
Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
- SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+ SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
+ getI32Imm(31, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETNE: {
if (isPPC64) break;
SDValue AD =
SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
- Op, getI32Imm(~0U)), 0);
+ Op, getI32Imm(~0U, dl)), 0);
return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
AD.getValue(1));
}
case ISD::SETLT: {
- SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
+ getI32Imm(31, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETGT: {
SDValue T =
SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
- SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
+ getI32Imm(31, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
}
@@ -2252,34 +2260,35 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETEQ:
if (isPPC64) break;
Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
- Op, getI32Imm(1)), 0);
+ Op, getI32Imm(1, dl)), 0);
return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
SDValue(CurDAG->getMachineNode(PPC::LI, dl,
MVT::i32,
- getI32Imm(0)), 0),
- Op.getValue(1));
+ getI32Imm(0, dl)),
+ 0), Op.getValue(1));
case ISD::SETNE: {
if (isPPC64) break;
Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
- Op, getI32Imm(~0U));
+ Op, getI32Imm(~0U, dl));
return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
Op, SDValue(AD, 1));
}
case ISD::SETLT: {
SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
- getI32Imm(1)), 0);
+ getI32Imm(1, dl)), 0);
SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
Op), 0);
- SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
+ getI32Imm(31, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETGT: {
- SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
- 0);
+ SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
+ getI32Imm(31, dl) };
+ Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
- getI32Imm(1));
+ getI32Imm(1, dl));
}
}
}
@@ -2291,6 +2300,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
+ if (PPCSubTarget->hasQPX())
+ return nullptr;
+
EVT VecVT = LHS.getValueType();
bool Swap, Negate;
unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
@@ -2326,15 +2338,23 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
CCReg), 0);
- SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
- getI32Imm(31), getI32Imm(31) };
+ SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
+ getI32Imm(31, dl), getI32Imm(31, dl) };
if (!Inv)
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
// Get the specified bit.
SDValue Tmp =
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
- return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
+}
+
+SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ return Result;
}
@@ -2394,7 +2414,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue ShiftAmt =
CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
- getConstantIntValue(), N->getValueType(0));
+ getConstantIntValue(), dl,
+ N->getValueType(0));
if (N->getValueType(0) == MVT::i64) {
SDNode *Op =
CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
@@ -2455,9 +2476,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
- return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops);
+ return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ PPCLowering->getPointerTy(),
+ MVT::Other, Ops));
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -2466,6 +2488,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
+ case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
@@ -2490,9 +2514,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
- return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops);
+ return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ PPCLowering->getPointerTy(),
+ MVT::Other, Ops));
}
}
@@ -2505,7 +2530,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (isInt32Immediate(N->getOperand(1), Imm) &&
isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
SDValue Val = N->getOperand(0).getOperand(0);
- SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// If this is just a masked value where the input is not handled above, and
@@ -2514,14 +2540,15 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
isRunOfOnes(Imm, MB, ME) &&
N->getOperand(0).getOpcode() != ISD::ROTL) {
SDValue Val = N->getOperand(0);
- SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+ SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// If this is a 64-bit zero-extension mask, emit rldicl.
if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
isMask_64(Imm64)) {
SDValue Val = N->getOperand(0);
- MB = 64 - CountTrailingOnes_64(Imm64);
+ MB = 64 - countTrailingOnes(Imm64);
SH = 0;
// If the operand is a logical right shift, we can fold it into this
@@ -2536,7 +2563,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SH = 64 - Imm;
}
- SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) };
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
}
// AND X, 0 -> 0, not "rlwinm 32".
@@ -2554,7 +2581,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (isRunOfOnes(Imm, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
N->getOperand(0).getOperand(1),
- getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+ getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
}
}
@@ -2595,7 +2623,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
- getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
@@ -2607,7 +2636,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
- getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
@@ -2627,11 +2657,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
N->getOperand(0),
- CurDAG->getTargetConstant(1, InVT)), 0);
+ CurDAG->getTargetConstant(1, dl, InVT)),
+ 0);
SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
SDValue SRIdxVal =
CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
- PPC::sub_eq : PPC::sub_gt, MVT::i32);
+ PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1,
CR0Reg, SRIdxVal,
@@ -2658,7 +2689,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
N->getValueType(0) == MVT::i32) {
SDNode *Tmp =
CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
- N->getOperand(0), getI32Imm(~0U));
+ N->getOperand(0), getI32Imm(~0U, dl));
return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
SDValue(Tmp, 0), N->getOperand(0),
SDValue(Tmp, 1));
@@ -2703,12 +2734,21 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
else if (N->getValueType(0) == MVT::i64)
SelectCCOp = PPC::SELECT_CC_I8;
else if (N->getValueType(0) == MVT::f32)
- SelectCCOp = PPC::SELECT_CC_F4;
+ if (PPCSubTarget->hasP8Vector())
+ SelectCCOp = PPC::SELECT_CC_VSSRC;
+ else
+ SelectCCOp = PPC::SELECT_CC_F4;
else if (N->getValueType(0) == MVT::f64)
if (PPCSubTarget->hasVSX())
SelectCCOp = PPC::SELECT_CC_VSFRC;
else
SelectCCOp = PPC::SELECT_CC_F8;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+ SelectCCOp = PPC::SELECT_CC_QFRC;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+ SelectCCOp = PPC::SELECT_CC_QSRC;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+ SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
@@ -2716,7 +2756,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SelectCCOp = PPC::SELECT_CC_VRRC;
SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
- getI32Imm(BROpc) };
+ getI32Imm(BROpc, dl) };
return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
}
case ISD::VSELECT:
@@ -2750,7 +2790,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
DM[1] = 1 - tmp;
}
- SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32);
+ SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
+ MVT::i32);
if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
@@ -2789,7 +2830,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Op #4 is the Flag.
// Prevent PPC::PRED_* from being selected into LI.
SDValue Pred =
- getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(), dl);
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
N->getOperand(0), N->getOperand(4) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
@@ -2819,7 +2860,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
- SDValue Ops[] = { getI32Imm(PCC), CondCode,
+ SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
N->getOperand(4), N->getOperand(0) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
}
@@ -2838,8 +2879,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
"Only supported for 64-bit ABI and 32-bit SVR4");
if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
SDValue GA = N->getOperand(0);
- return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
- N->getOperand(1));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
+ MVT::i32, GA, N->getOperand(1)));
}
// For medium and large code model, we generate two instructions as
@@ -2859,12 +2900,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
- TOCbase, GA);
+ TOCbase, GA);
if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
CModel == CodeModel::Large)
- return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+ MVT::i64, GA, SDValue(Tmp, 0)));
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
@@ -2872,8 +2913,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
(GValue->isDeclaration() || GValue->isWeakForLinker())) ||
GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage())
- return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+ MVT::i64, GA, SDValue(Tmp, 0)));
}
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
@@ -2922,7 +2963,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Into: tmp = VSPLTIS[BHW] elt
// VADDU[BHW]M tmp, tmp
// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
- SDValue EltVal = getI32Imm(Elt >> 1);
+ SDValue EltVal = getI32Imm(Elt >> 1, dl);
SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
SDValue TmpVal = SDValue(Tmp, 0);
return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
@@ -2934,9 +2975,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Into: tmp1 = VSPLTIS[BHW] elt-16
// tmp2 = VSPLTIS[BHW] -16
// VSUBU[BHW]M tmp1, tmp2
- SDValue EltVal = getI32Imm(Elt - 16);
+ SDValue EltVal = getI32Imm(Elt - 16, dl);
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
- EltVal = getI32Imm(-16);
+ EltVal = getI32Imm(-16, dl);
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0));
@@ -2948,9 +2989,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Into: tmp1 = VSPLTIS[BHW] elt+16
// tmp2 = VSPLTIS[BHW] -16
// VADDU[BHW]M tmp1, tmp2
- SDValue EltVal = getI32Imm(Elt + 16);
+ SDValue EltVal = getI32Imm(Elt + 16, dl);
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
- EltVal = getI32Imm(-16);
+ EltVal = getI32Imm(-16, dl);
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0));
@@ -3159,7 +3200,8 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
if (NonTrivialMask && !Alt) {
// Res = Mask & CMPB
- Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask, VT));
+ Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
+ CurDAG->getConstant(Mask, dl, VT));
} else if (Alt) {
// Res = (CMPB & Mask) | (~CMPB & Alt)
// Which, as suggested here:
@@ -3168,8 +3210,9 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
// Res = Alt ^ ((Alt ^ Mask) & CMPB)
// useful because the (Alt ^ Mask) can be pre-computed.
Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
- CurDAG->getConstant(Mask ^ Alt, VT));
- Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, CurDAG->getConstant(Alt, VT));
+ CurDAG->getConstant(Mask ^ Alt, dl, VT));
+ Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
+ CurDAG->getConstant(Alt, dl, VT));
}
return Res;
@@ -3201,20 +3244,20 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
EVT VT = N->getValueType(0);
SDValue Cond = N->getOperand(0);
SDValue ConstTrue =
- CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
- SDValue ConstFalse = CurDAG->getConstant(0, VT);
+ CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
+ SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
do {
SDNode *User = *N->use_begin();
if (User->getNumOperands() != 2)
break;
- auto TryFold = [this, N, User](SDValue Val) {
+ auto TryFold = [this, N, User, dl](SDValue Val) {
SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
- return CurDAG->FoldConstantArithmetic(User->getOpcode(),
+ return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
User->getValueType(0),
O0.getNode(), O1.getNode());
};
@@ -3404,8 +3447,12 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSSRC:
case PPC::SELECT_VSRC: {
SDValue Op = MachineNode->getOperand(0);
if (Op.isMachineOpcode()) {
@@ -3711,8 +3758,12 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSSRC:
case PPC::SELECT_VSRC:
if (Op1Set)
ResNode = MachineNode->getOperand(1).getNode();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e580c811d71f..bb9315e9520e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+
using namespace llvm;
// FIXME: Remove this once soft-float is supported.
@@ -57,9 +58,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
-PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
- : TargetLowering(TM),
- Subtarget(*TM.getSubtargetImpl()) {
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
+ const PPCSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
@@ -88,11 +89,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
if (Subtarget.useCRBits()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -172,13 +177,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
// If we're enabling GP optimizations, use hardware square root
if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath &&
- Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
+ !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
+ Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath &&
- Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
+ !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
+ Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
if (Subtarget.hasFCPSGN()) {
@@ -400,6 +405,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
+
+ // Vector instructions introduced in P8
+ if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
+ setOperationAction(ISD::CTPOP, VT, Legal);
+ setOperationAction(ISD::CTLZ, VT, Legal);
+ }
+ else {
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
// We promote all shuffles to v16i8.
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
@@ -455,8 +470,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
@@ -504,7 +517,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
- setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ else
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -557,20 +575,32 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
+ if (Subtarget.hasP8Vector())
+ addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
+
addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
- // VSX v2i64 only supports non-arithmetic operations.
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ if (Subtarget.hasP8Altivec()) {
+ setOperationAction(ISD::SHL, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRA, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v2i64, Expand);
- setOperationAction(ISD::SRA, MVT::v2i64, Expand);
- setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
+ }
+ else {
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ }
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@@ -593,6 +623,167 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
}
+
+ if (Subtarget.hasP8Altivec()) {
+ addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
+ }
+ }
+
+ if (Subtarget.hasQPX()) {
+ setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FREM, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
+ setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
+ setOperationAction(ISD::STORE , MVT::v4f64, Custom);
+
+ setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
+ setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
+ setOperationAction(ISD::FABS , MVT::v4f64, Legal);
+ setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
+ setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
+ setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
+
+ setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
+
+ addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FREM, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
+ setOperationAction(ISD::STORE , MVT::v4f32, Custom);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
+ setOperationAction(ISD::FABS , MVT::v4f32, Legal);
+ setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
+ setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
+
+ addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
+
+ setOperationAction(ISD::AND , MVT::v4i1, Legal);
+ setOperationAction(ISD::OR , MVT::v4i1, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i1, Legal);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
+
+ setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
+ setOperationAction(ISD::STORE , MVT::v4i1, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
+
+ addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
+
+ // These need to set FE_INEXACT, and so cannot be vectorized here.
+ setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ } else {
+ setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ }
}
if (Subtarget.has64BitSupport())
@@ -606,8 +797,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
}
setBooleanContents(ZeroOrOneBooleanContent);
- // Altivec instructions set fields to all zeros or all ones.
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ if (Subtarget.hasAltivec()) {
+ // Altivec instructions set fields to all zeros or all ones.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ }
if (!isPPC64) {
// These libcalls are not available in 32-bit.
@@ -672,8 +866,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
- if (Subtarget.useCRBits())
+ if (Subtarget.useCRBits()) {
setHasMultipleConditionRegisters();
+ setJumpIsExpensive();
+ }
setMinFunctionAlignment(2);
if (Subtarget.isDarwin())
@@ -704,7 +900,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
else
setSchedulingPreference(Sched::Hybrid);
- computeRegisterProperties();
+ computeRegisterProperties(STI.getRegisterInfo());
// The Freescale cores do better with aggressive inlining of memcpy and
// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
@@ -716,6 +912,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
+ } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 also benefits from (very) aggressive inlining of memcpy and
+ // friends. The overhead of a the function call, even when warm, can be
+ // over one hundred cycles.
+ MaxStoresPerMemset = 128;
+ MaxStoresPerMemcpy = 128;
+ MaxStoresPerMemmove = 128;
}
}
@@ -763,8 +966,8 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return nullptr;
+ switch ((PPCISD::NodeType)Opcode) {
+ case PPCISD::FIRST_NUMBER: break;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
@@ -784,17 +987,14 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
- case PPCISD::LOAD: return "PPCISD::LOAD";
- case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
- case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
- case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
@@ -803,14 +1003,19 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
+ case PPCISD::MFVSR: return "PPCISD::MFVSR";
+ case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
+ case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
+ case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
+ case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
- case PPCISD::LARX: return "PPCISD::LARX";
- case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
+ case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
@@ -819,27 +1024,44 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
- case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
- case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
- case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
+ case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
case PPCISD::SC: return "PPCISD::SC";
+ case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
+ case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
+ case PPCISD::RFEBB: return "PPCISD::RFEBB";
+ case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
+ case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
+ case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
+ case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
+ case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
+ case PPCISD::QBFLT: return "PPCISD::QBFLT";
+ case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
}
+ return nullptr;
}
-EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
+
+ if (Subtarget.hasQPX())
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+
return VT.changeVectorElementTypeToInteger();
}
@@ -875,11 +1097,11 @@ static bool isConstantOrUndef(int Op, int Val) {
/// VPKUHUM instruction.
/// The ShuffleKind distinguishes between big-endian operations with
/// two different inputs (0), either-endian operations with two identical
-/// inputs (1), and little-endian operantion with two different inputs (2).
+/// inputs (1), and little-endian operations with two different inputs (2).
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
@@ -906,11 +1128,11 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
/// VPKUWUM instruction.
/// The ShuffleKind distinguishes between big-endian operations with
/// two different inputs (0), either-endian operations with two identical
-/// inputs (1), and little-endian operantion with two different inputs (2).
+/// inputs (1), and little-endian operations with two different inputs (2).
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
@@ -937,6 +1159,56 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
return true;
}
+/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
+/// current subtarget.
+///
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operations with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ const PPCSubtarget& Subtarget =
+ static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ if (!Subtarget.hasP8Vector())
+ return false;
+
+ bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ if (ShuffleKind == 0) {
+ if (IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; i += 4)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
+ !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
+ !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
+ return false;
+ } else if (ShuffleKind == 2) {
+ if (!IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; i += 4)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = IsLE ? 0 : 4;
+ for (unsigned i = 0; i != 8; i += 4)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
+ return false;
+ }
+ return true;
+}
+
/// isVMerge - Common function, used to match vmrg* shuffles.
///
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
@@ -965,7 +1237,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 2) // swapped
@@ -990,7 +1262,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 2) // swapped
@@ -1034,8 +1306,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
- bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()->
- isLittleEndian();
+ bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
// Check the rest of the elements to see if they are consecutive.
@@ -1086,29 +1357,13 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
-/// isAllNegativeZeroVector - Returns true if all elements of build_vector
-/// are -0.0.
-bool PPC::isAllNegativeZeroVector(SDNode *N) {
- BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
-
- APInt APVal, APUndef;
- unsigned BitSize;
- bool HasAnyUndefs;
-
- if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
- return CFP->getValueAPF().isNegZero();
-
- return false;
-}
-
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- if (DAG.getSubtarget().getDataLayout()->isLittleEndian())
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
return SVOp->getMaskElt(0) / EltSize;
@@ -1161,17 +1416,17 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// Finally, check the least significant entry.
if (LeadingZero) {
if (!UniquedVals[Multiple-1].getNode())
- return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
+ return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
- if (Val < 16)
- return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
+ if (Val < 16) // 0,0,0,4 -> vspltisw(4)
+ return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
}
if (LeadingOnes) {
if (!UniquedVals[Multiple-1].getNode())
- return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
+ return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
- return DAG.getTargetConstant(Val, MVT::i32);
+ return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
}
return SDValue();
@@ -1202,17 +1457,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// immediate field for would be zero, and we prefer to use vxor for it.
if (ValSizeInBytes < ByteSize) return SDValue();
- // If the element value is larger than the splat value, cut it in half and
- // check to see if the two halves are equal. Continue doing this until we
- // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
- while (ValSizeInBytes > ByteSize) {
- ValSizeInBytes >>= 1;
-
- // If the top half equals the bottom half, we're still ok.
- if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
- (Value & ((1 << (8*ValSizeInBytes))-1)))
- return SDValue();
- }
+ // If the element value is larger than the splat value, check if it consists
+ // of a repeated bit pattern of size ByteSize.
+ if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
+ return SDValue();
// Properly sign extend the value.
int MaskVal = SignExtend32(Value, ByteSize * 8);
@@ -1222,10 +1470,40 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// Finally, if this value fits in a 5 bit sext field, return it
if (SignExtend32<5>(MaskVal) == MaskVal)
- return DAG.getTargetConstant(MaskVal, MVT::i32);
+ return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
return SDValue();
}
+/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isQVALIGNIShuffleMask(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
+ return -1;
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 4) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 4; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+
+ return ShiftAmt;
+}
+
//===----------------------------------------------------------------------===//
// Addressing Mode Selection
//===----------------------------------------------------------------------===//
@@ -1351,7 +1629,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Aligned || (imm & 3) == 0)) {
- Disp = DAG.getTargetConstant(imm, N.getValueType());
+ Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
@@ -1391,7 +1669,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
} else {
Base = N.getOperand(0);
}
- Disp = DAG.getTargetConstant(imm, N.getValueType());
+ Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
return true;
}
}
@@ -1402,7 +1680,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// this as "d, 0"
short Imm;
if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
- Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+ Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
@@ -1415,16 +1693,17 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
- Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+ Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
- Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
+ MVT::i32);
unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
return true;
}
}
- Disp = DAG.getTargetConstant(0, getPointerTy());
+ Disp = DAG.getTargetConstant(0, dl, getPointerTy());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
@@ -1485,9 +1764,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
} else
return false;
- // PowerPC doesn't have preinc load/store instructions for vectors.
- if (VT.isVector())
- return false;
+ // PowerPC doesn't have preinc load/store instructions for vectors (except
+ // for QPX, which does have preinc r+r forms).
+ if (VT.isVector()) {
+ if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
+ return false;
+ } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
+ AM = ISD::PRE_INC;
+ return true;
+ }
+ }
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
@@ -1544,8 +1830,9 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
/// GetLabelAccessInfo - Return true if we should reference labels using a
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
-static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
- unsigned &LoOpFlags,
+static bool GetLabelAccessInfo(const TargetMachine &TM,
+ const PPCSubtarget &Subtarget,
+ unsigned &HiOpFlags, unsigned &LoOpFlags,
const GlobalValue *GV = nullptr) {
HiOpFlags = PPCII::MO_HA;
LoOpFlags = PPCII::MO_LO;
@@ -1560,7 +1847,7 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
// If this is a reference to a global value that requires a non-lazy-ptr, make
// sure that instruction lowering adds it.
- if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+ if (GV && Subtarget.hasLazyResolverStub(GV)) {
HiOpFlags |= PPCII::MO_NLP_FLAG;
LoOpFlags |= PPCII::MO_NLP_FLAG;
@@ -1575,9 +1862,9 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
SelectionDAG &DAG) {
- EVT PtrVT = HiPart.getValueType();
- SDValue Zero = DAG.getConstant(0, PtrVT);
SDLoc DL(HiPart);
+ EVT PtrVT = HiPart.getValueType();
+ SDValue Zero = DAG.getConstant(0, DL, PtrVT);
SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
@@ -1592,6 +1879,28 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
}
+static void setUsesTOCBasePtr(MachineFunction &MF) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setUsesTOCBasePtr();
+}
+
+static void setUsesTOCBasePtr(SelectionDAG &DAG) {
+ setUsesTOCBasePtr(DAG.getMachineFunction());
+}
+
+static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
+ SDValue GA) {
+ EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
+ DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
+
+ SDValue Ops[] = { GA, Reg };
+ return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
+ DAG.getVTList(VT, MVT::Other), Ops, VT,
+ MachinePointerInfo::getGOT(), 0, false, true,
+ false, 0);
+}
+
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
@@ -1601,20 +1910,19 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
- return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
- DAG.getRegister(PPC::X2, MVT::i64));
+ return getTOCEntry(DAG, SDLoc(CP), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
- bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
PPCII::MO_PIC_FLAG);
- SDLoc DL(CP);
- return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
- DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ return getTOCEntry(DAG, SDLoc(CP), false, GA);
}
SDValue CPIHi =
@@ -1631,20 +1939,19 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
- DAG.getRegister(PPC::X2, MVT::i64));
+ return getTOCEntry(DAG, SDLoc(JT), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
- bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
PPCII::MO_PIC_FLAG);
- SDLoc DL(GA);
- return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
- DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ return getTOCEntry(DAG, SDLoc(GA), false, GA);
}
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@@ -1661,39 +1968,19 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual BlockAddress is stored in the TOC.
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
- return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
- DAG.getRegister(PPC::X2, MVT::i64));
+ return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
- bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
-// Generate a call to __tls_get_addr for the given GOT entry Op.
-std::pair<SDValue,SDValue>
-PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
- SelectionDAG &DAG) const {
-
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Op;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, IntPtrTy,
- DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
- std::move(Args), 0);
-
- return LowerCallTo(CLI);
-}
-
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
@@ -1728,6 +2015,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
PPCII::MO_TLS);
SDValue GOTPtr;
if (is64bit) {
+ setUsesTOCBasePtr(DAG);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
@@ -1739,10 +2027,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::GeneralDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLSGD);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
+ setUsesTOCBasePtr(DAG);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
GOTReg, TGA);
@@ -1752,17 +2040,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
- SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
- GOTPtr, TGA);
- std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
- return CallResult.first;
+ return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
+ GOTPtr, TGA, TGA);
}
if (Model == TLSModel::LocalDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLSLD);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
+ setUsesTOCBasePtr(DAG);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
GOTReg, TGA);
@@ -1772,13 +2058,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
- SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
- GOTPtr, TGA);
- std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
- SDValue TLSAddr = CallResult.first;
- SDValue Chain = CallResult.second;
- SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
- Chain, TLSAddr, TGA);
+ SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
+ PtrVT, GOTPtr, TGA, TGA);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
+ PtrVT, TLSAddr, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
@@ -1795,20 +2078,20 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
- return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
- DAG.getRegister(PPC::X2, MVT::i64));
+ return getTOCEntry(DAG, DL, true, GA);
}
unsigned MOHiFlag, MOLoFlag;
- bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
GSDN->getOffset(),
PPCII::MO_PIC_FLAG);
- return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
- DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
+ return getTOCEntry(DAG, DL, false, GA);
}
SDValue GAHi =
@@ -1865,7 +2148,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
unsigned Log2b = Log2_32(VT.getSizeInBits());
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
- DAG.getConstant(Log2b, MVT::i32));
+ DAG.getConstant(Log2b, dl, MVT::i32));
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
}
// Leave comparisons against 0 and -1 alone for now, since they're usually
@@ -1885,7 +2168,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
Op.getOperand(1));
- return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+ return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
}
return SDValue();
}
@@ -1911,11 +2194,11 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
if (VT == MVT::i64) {
// Check if GprIndex is even
SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, dl, MVT::i32));
SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
- DAG.getConstant(0, MVT::i32), ISD::SETNE);
+ DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, dl, MVT::i32));
// Align GprIndex to be even if it isn't
GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
GprIndex);
@@ -1923,7 +2206,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// fpr index is 1 byte after gpr
SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, dl, MVT::i32));
// fpr
SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
@@ -1932,10 +2215,10 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
InChain = FprIndex.getValue(1);
SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(8, dl, MVT::i32));
SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
- DAG.getConstant(4, MVT::i32));
+ DAG.getConstant(4, dl, MVT::i32));
// areas
SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
@@ -1950,12 +2233,12 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// select overflow_area if index > 8
SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
- DAG.getConstant(8, MVT::i32), ISD::SETLT);
+ DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
// adjustment constant gpr_index * 4/8
SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
VT.isInteger() ? GprIndex : FprIndex,
- DAG.getConstant(VT.isInteger() ? 4 : 8,
+ DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
MVT::i32));
// OurReg = RegSaveArea + RegConstant
@@ -1965,12 +2248,12 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// Floating types are 32 bytes into RegSaveArea
if (VT.isFloatingPoint())
OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
- DAG.getConstant(32, MVT::i32));
+ DAG.getConstant(32, dl, MVT::i32));
// increase {f,g}pr_index by 1 (or 2 if VT is i64)
SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
VT.isInteger() ? GprIndex : FprIndex,
- DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+ DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
MVT::i32));
InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
@@ -1984,7 +2267,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// increase overflow_area by 4/8 if gpr/fpr > 8
SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
DAG.getConstant(VT.isInteger() ? 4 : 8,
- MVT::i32));
+ dl, MVT::i32));
OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
OverflowAreaPlusN);
@@ -2006,8 +2289,8 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
// 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
return DAG.getMemcpy(Op.getOperand(0), Op,
Op.getOperand(1), Op.getOperand(2),
- DAG.getConstant(12, MVT::i32), 8, false, true,
- MachinePointerInfo(), MachinePointerInfo());
+ DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
+ false, MachinePointerInfo(), MachinePointerInfo());
}
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
@@ -2036,7 +2319,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
Entry.Node = Trmp; Args.push_back(Entry);
// TrampSize == (isPPC64 ? 48 : 40);
- Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+ Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
isPPC64 ? MVT::i64 : MVT::i32);
Args.push_back(Entry);
@@ -2097,8 +2380,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
// } va_list[1];
- SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
- SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
+ SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
+ SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -2109,13 +2392,13 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
PtrVT);
uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
- SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+ SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
- SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+ SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
uint64_t FPROffset = 1;
- SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+ SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -2177,7 +2460,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
};
const unsigned NumArgRegs = array_lengthof(ArgRegs);
- unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
// Skip one register if the first unallocated register has an even register
// number and there are still argument registers available which have not been
@@ -2205,7 +2488,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
const unsigned NumArgRegs = array_lengthof(ArgRegs);
- unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
// If there is only one Floating-point register left we need to put both f64
// values of a split ppc_fp128 value on the stack.
@@ -2220,16 +2503,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// FPR - The set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const MCPhysReg *GetFPR() {
- static const MCPhysReg FPR[] = {
- PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
- };
+static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
+ PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
+ PPC::F11, PPC::F12, PPC::F13};
- return FPR;
-}
+/// QFPR - The set of QPX registers that should be allocated for arguments.
+static const MCPhysReg QFPR[] = {
+ PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
@@ -2257,8 +2540,13 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
// Altivec parameters are padded to a 16 byte boundary.
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
- ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
+ ArgVT == MVT::v1i128)
Align = 16;
+ // QPX vector types stored in double-precision are padded to a 32 byte
+ // boundary.
+ else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
+ Align = 32;
// ByVal parameters are aligned as requested.
if (Flags.isByVal()) {
@@ -2297,7 +2585,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
unsigned ParamAreaSize,
unsigned &ArgOffset,
unsigned &AvailableFPRs,
- unsigned &AvailableVRs) {
+ unsigned &AvailableVRs, bool HasQPX) {
bool UseMemory = false;
// Respect alignment of argument on the stack.
@@ -2321,14 +2609,19 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
// However, if the argument is actually passed in an FPR or a VR,
// we don't use memory after all.
if (!Flags.isByVal()) {
- if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
+ // QPX registers overlap with the scalar FP registers.
+ (HasQPX && (ArgVT == MVT::v4f32 ||
+ ArgVT == MVT::v4f64 ||
+ ArgVT == MVT::v4i1)))
if (AvailableFPRs > 0) {
--AvailableFPRs;
return false;
}
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
- ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
+ ArgVT == MVT::v1i128)
if (AvailableVRs > 0) {
--AvailableVRs;
return false;
@@ -2340,10 +2633,9 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
/// EnsureStackAlignment - Round stack frame size up from NumBytes to
/// ensure minimum alignment required for target.
-static unsigned EnsureStackAlignment(const TargetMachine &Target,
+static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
unsigned NumBytes) {
- unsigned TargetAlign =
- Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign = Lowering->getStackAlignment();
unsigned AlignMask = TargetAlign - 1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
return NumBytes;
@@ -2424,7 +2716,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
*DAG.getContext());
// Reserve space for the linkage area on the stack.
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2445,7 +2737,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::GPRCRegClass;
break;
case MVT::f32:
- RC = &PPC::F4RCRegClass;
+ if (Subtarget.hasP8Vector())
+ RC = &PPC::VSSRCRegClass;
+ else
+ RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
if (Subtarget.hasVSX())
@@ -2456,13 +2751,21 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
- case MVT::v4f32:
RC = &PPC::VRRCRegClass;
break;
+ case MVT::v4f32:
+ RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
+ break;
case MVT::v2f64:
case MVT::v2i64:
RC = &PPC::VSHRCRegClass;
break;
+ case MVT::v4f64:
+ RC = &PPC::QFRCRegClass;
+ break;
+ case MVT::v4i1:
+ RC = &PPC::QBRCRegClass;
+ break;
}
// Transform the arguments stored in physical registers into virtual ones.
@@ -2510,7 +2813,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// call optimized function's reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
FuncInfo->setMinReservedArea(MinReservedArea);
SmallVector<SDValue, 8> MemOps;
@@ -2532,10 +2836,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
if (DisablePPCFloatInVariadic)
NumFPArgRegs = 0;
- FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
- NumGPArgRegs));
- FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
- NumFPArgRegs));
+ FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
+ FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
// Make room for NumGPArgRegs and NumFPArgRegs.
int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
@@ -2562,7 +2864,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
- SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
@@ -2581,7 +2883,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
- SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
@@ -2625,22 +2927,20 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ "fastcc not supported on varargs functions");
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
-
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
- isELFv2ABI);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -2653,6 +2953,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
+ const unsigned Num_QFPR_Regs = Num_FPR_Regs;
// Do a first pass over the arguments to determine whether the ABI
// guarantees that our caller has allocated the parameter save area
@@ -2668,7 +2969,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
for (unsigned i = 0, e = Ins.size(); i != e; ++i)
if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytes, AvailableFPRs, AvailableVRs))
+ NumBytes, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
HasParameterArea = true;
// Add DAG nodes to load the arguments or copy them out of registers. On
@@ -2676,7 +2978,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// although the first ones are often in registers.
unsigned ArgOffset = LinkageSize;
- unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned &QFPR_idx = FPR_idx;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
@@ -2692,21 +2995,33 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[ArgNo].getOrigArgIndex();
}
- /* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
- unsigned CurArgOffset = ArgOffset;
+ // We re-align the argument offset for each argument, except when using the
+ // fast calling convention, when we need to make sure we do that only when
+ // we'll actually use a stack slot.
+ unsigned CurArgOffset, Align;
+ auto ComputeArgOffset = [&]() {
+ /* Respect alignment of argument on the stack. */
+ Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ CurArgOffset = ArgOffset;
+ };
+
+ if (CallConv != CallingConv::Fast) {
+ ComputeArgOffset();
- /* Compute GPR index associated with argument offset. */
- GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
- GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+ }
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -2744,13 +3059,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// address of the enclosing doubleword on big-endian systems.
SDValue Arg = FIN;
if (!isLittleEndian) {
- SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
+ SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
}
InVals.push_back(Arg);
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store;
@@ -2790,7 +3105,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Addr = FIN;
if (j) {
- SDValue Off = DAG.getConstant(j, PtrVT);
+ SDValue Off = DAG.getConstant(j, dl, PtrVT);
Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
}
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
@@ -2812,7 +3127,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
@@ -2820,10 +3135,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
needsLoad = true;
ArgSize = PtrByteSize;
}
- ArgOffset += 8;
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 8;
break;
case MVT::f32:
@@ -2835,40 +3154,51 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned VReg;
if (ObjectVT == MVT::f32)
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx],
+ Subtarget.hasP8Vector()
+ ? &PPC::VSSRCRegClass
+ : &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
- &PPC::VSFRCRegClass :
- &PPC::F8RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
+ ? &PPC::VSFRCRegClass
+ : &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
- } else if (GPR_idx != Num_GPR_Regs) {
+ } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
+ // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+ // once we support fp <-> gpr moves.
+
// This can only ever happen in the presence of f32 array types,
// since otherwise we never run out of FPRs before running out
// of GPRs.
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::f32) {
if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
- DAG.getConstant(32, MVT::i32));
+ DAG.getConstant(32, dl, MVT::i32));
ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
}
ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
needsLoad = true;
}
// When passing an array of floats, the array occupies consecutive
// space in the argument area; only round up to the next doubleword
// at the end of the array. Otherwise, each float takes 8 bytes.
- ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
- ArgOffset += ArgSize;
- if (Flags.isInConsecutiveRegsLast())
- ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ if (CallConv != CallingConv::Fast || needsLoad) {
+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+ ArgOffset += ArgSize;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
break;
case MVT::v4f32:
case MVT::v4i32:
@@ -2876,6 +3206,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ case MVT::v1i128:
+ if (!Subtarget.hasQPX()) {
// These can be scalar arguments or elements of a vector array type
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
@@ -2886,9 +3218,43 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++VR_idx;
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
+ needsLoad = true;
+ }
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 16;
+ break;
+ } // not QPX
+
+ assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
+ "Invalid QPX parameter type");
+ /* fall through */
+
+ case MVT::v4f64:
+ case MVT::v4i1:
+ // QPX vectors are treated like their scalar floating-point subregisters
+ // (except that they're larger).
+ unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
+ if (QFPR_idx != Num_QFPR_Regs) {
+ const TargetRegisterClass *RC;
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
+ case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
+ default: RC = &PPC::QBRCRegClass; break;
+ }
+
+ unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++QFPR_idx;
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
needsLoad = true;
}
- ArgOffset += 16;
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += Sz;
break;
}
@@ -2917,7 +3283,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
@@ -2940,7 +3307,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
- SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
+ SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
}
@@ -2971,9 +3338,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
- false);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned ArgOffset = LinkageSize;
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
@@ -2986,9 +3351,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -3281,7 +3643,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
@@ -3310,7 +3673,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
- SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
}
@@ -3388,7 +3751,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
SignExtend32<26>(Addr) != Addr)
return nullptr; // Top 6 bits have to be sext of immediate.
- return DAG.getConstant((int)C->getZExtValue() >> 2,
+ return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
DAG.getTargetLoweringInfo().getPointerTy()).getNode();
}
@@ -3436,8 +3799,9 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
if (SPDiff) {
// Calculate the new stack slot for the return address.
int SlotSize = isPPC64 ? 8 : 4;
- int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
- isDarwinABI);
+ const PPCFrameLowering *FL =
+ MF.getSubtarget<PPCSubtarget>().getFrameLowering();
+ int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
@@ -3449,8 +3813,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
// When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
// slot as the FP is never overwritten.
if (isDarwinABI) {
- int NewFPLoc =
- SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
@@ -3520,9 +3883,9 @@ static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
SDLoc dl) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, MachinePointerInfo(),
+ false, false, false, MachinePointerInfo(),
MachinePointerInfo());
}
@@ -3544,7 +3907,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
else
StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, PtrVT));
+ DAG.getConstant(ArgOffset, dl, PtrVT));
}
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0));
@@ -3575,8 +3938,8 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
isPPC64, isDarwinABI, dl);
// Emit callseq_end just before tailcall node.
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
@@ -3596,11 +3959,11 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
- SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
- bool IsPatchPoint,
+ SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
+ bool isTailCall, bool IsPatchPoint,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- const PPCSubtarget &Subtarget) {
+ ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
@@ -3701,50 +4064,51 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// 6. On return of the callee, the TOC of the caller needs to be
// restored (this is done in FinishCall()).
//
- // All those operations are flagged together to ensure that no other
+ // The loads are scheduled at the beginning of the call sequence, and the
+ // register copies are flagged together to ensure that no other
// operations can be scheduled in between. E.g. without flagging the
- // operations together, a TOC access in the caller could be scheduled
- // between the load of the callee TOC and the branch to the callee, which
+ // copies together, a TOC access in the caller could be scheduled between
+ // the assignment of the callee TOC and the branch to the callee, which
// results in the TOC access going through the TOC of the callee instead
// of going through the TOC of the caller, which leads to incorrect code.
// Load the address of the function entry point from the function
// descriptor.
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
- SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
- makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
- Chain = LoadFuncPtr.getValue(1);
- InFlag = LoadFuncPtr.getValue(2);
+ SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
+ if (LDChain.getValueType() == MVT::Glue)
+ LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
- // Load environment pointer into r11.
- // Offset of the environment pointer within the function descriptor.
- SDValue PtrOff = DAG.getIntPtrConstant(16);
+ bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
+ MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
+ SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
+ false, false, LoadsInv, 8);
+
+ // Load environment pointer into r11.
+ SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
- SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
- InFlag);
- Chain = LoadEnvPtr.getValue(1);
- InFlag = LoadEnvPtr.getValue(2);
+ SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
+ MPI.getWithOffset(16), false, false,
+ LoadsInv, 8);
+
+ SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
+ SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
+ MPI.getWithOffset(8), false, false,
+ LoadsInv, 8);
+
+ setUsesTOCBasePtr(DAG);
+ SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
+ InFlag);
+ Chain = TOCVal.getValue(0);
+ InFlag = TOCVal.getValue(1);
SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
InFlag);
+
Chain = EnvVal.getValue(0);
InFlag = EnvVal.getValue(1);
- // Load TOC of the callee into r2. We are using a target-specific load
- // with r2 hard coded, because the result of a target-independent load
- // would never go directly into r2, since r2 is a reserved register (which
- // prevents the register allocator from allocating it), resulting in an
- // additional register being allocated and an unnecessary move instruction
- // being generated.
- VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue TOCOff = DAG.getIntPtrConstant(8);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
- SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
- AddTOC, InFlag);
- Chain = LoadTOCPtr.getValue(0);
- InFlag = LoadTOCPtr.getValue(1);
-
MTCTROps[0] = Chain;
MTCTROps[1] = LoadFuncPtr;
MTCTROps[2] = InFlag;
@@ -3772,27 +4136,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
-
- // If this is a call to __tls_get_addr, find the symbol whose address
- // is to be taken and add it to the list. This will be used to
- // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
- // We find the symbol by walking the chain to the CopyFromReg, walking
- // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
- // pulling the symbol from that node.
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
- assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
- SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
- SDValue TGTAddr = AddI->getOperand(1);
- assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
- "Didn't find target global TLS address where we expected one");
- Ops.push_back(TGTAddr);
- CallOpc = PPCISD::CALL_TLS;
- }
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
- Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+ Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
// Add argument registers to the end of the list so that they are known live
// into the call.
@@ -3800,9 +4147,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- // Direct calls in the ELFv2 ABI need the TOC register live into the call.
- if (Callee.getNode() && isELFv2ABI && !IsPatchPoint)
+ // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
+ // into the call.
+ if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
+ setUsesTOCBasePtr(DAG);
Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+ }
return CallOpc;
}
@@ -3869,17 +4219,17 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
SDValue InFlag, SDValue Chain,
- SDValue &Callee,
+ SDValue CallSeqStart, SDValue &Callee,
int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
- bool isELFv2ABI = Subtarget.isELFv2ABI();
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
- unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
- isTailCall, IsPatchPoint, RegsToPass, Ops,
- NodeTys, Subtarget);
+ unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
+ SPDiff, isTailCall, IsPatchPoint, RegsToPass,
+ Ops, NodeTys, CS, Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
@@ -3893,9 +4243,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3911,6 +4261,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
isa<ConstantSDNode>(Callee)) &&
"Expecting an global address, external symbol, absolute value or register");
+ DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
}
@@ -3939,8 +4290,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
- SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
// The address needs to go after the chain input but before the flag (or
@@ -3948,19 +4299,16 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
Ops.insert(std::next(Ops.begin()), AddTOC);
} else if ((CallOpc == PPCISD::CALL) &&
(!isLocalCall(Callee) ||
- DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_))
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
- } else if (CallOpc == PPCISD::CALL_TLS)
- // For 64-bit SVR4, TLS calls are always non-local.
- CallOpc = PPCISD::CALL_NOP_TLS;
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(BytesCalleePops, true),
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ DAG.getIntPtrConstant(BytesCalleePops, dl, true),
InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
@@ -3983,12 +4331,13 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
bool IsPatchPoint = CLI.IsPatchPoint;
+ ImmutableCallSite *CS = CLI.CS;
if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
- if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ if (!isTailCall && CS && CS->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
@@ -3996,16 +4345,16 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ dl, DAG, InVals, CS);
else
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ dl, DAG, InVals, CS);
}
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ dl, DAG, InVals, CS);
}
SDValue
@@ -4016,7 +4365,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
@@ -4046,7 +4396,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
*DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+ CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
PtrByteSize);
if (isVarArg) {
@@ -4102,7 +4452,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
dl);
SDValue CallSeqStart = Chain;
@@ -4142,7 +4492,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Memory reserved in the local variable space of the callers stack frame.
unsigned LocMemOffset = ByValVA.getLocMemOffset();
- SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
// Create a copy of the argument in the local area of the current
@@ -4179,7 +4529,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
unsigned LocMemOffset = VA.getLocMemOffset();
if (!isTailCall) {
- SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
@@ -4222,8 +4572,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
false, TailCallArguments);
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
- RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
- Ins, InVals);
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
}
// Copy an argument into memory, being careful to do this outside the
@@ -4254,7 +4604,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
@@ -4274,13 +4625,39 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+ assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ "fastcc not supported on varargs functions");
+
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
// area is 32 bytes reserved space for [SP][CR][LR][TOC].
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
- isELFv2ABI);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned NumBytes = LinkageSize;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned &QFPR_idx = FPR_idx;
+
+ static const MCPhysReg GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
+ const unsigned NumGPRs = array_lengthof(GPR);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+ const unsigned NumQFPRs = NumFPRs;
+
+ // When using the fast calling convention, we don't provide backing for
+ // arguments that will be in registers.
+ unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
// Add up all the space actually used.
for (unsigned i = 0; i != NumOps; ++i) {
@@ -4288,6 +4665,48 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
EVT ArgVT = Outs[i].VT;
EVT OrigVT = Outs[i].ArgVT;
+ if (CallConv == CallingConv::Fast) {
+ if (Flags.isByVal())
+ NumGPRsUsed += (Flags.getByValSize()+7)/8;
+ else
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (++NumGPRsUsed <= NumGPRs)
+ continue;
+ break;
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ case MVT::v1i128:
+ if (++NumVRsUsed <= NumVRs)
+ continue;
+ break;
+ case MVT::v4f32:
+ // When using QPX, this is handled like a FP register, otherwise, it
+ // is an Altivec register.
+ if (Subtarget.hasQPX()) {
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ } else {
+ if (++NumVRsUsed <= NumVRs)
+ continue;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f64: // QPX
+ case MVT::v4i1: // QPX
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ break;
+ }
+ }
+
/* Respect alignment of argument on the stack. */
unsigned Align =
CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
@@ -4311,7 +4730,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
CallConv == CallingConv::Fast)
- NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+ NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4324,7 +4743,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
dl);
SDValue CallSeqStart = Chain;
@@ -4344,26 +4763,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
unsigned ArgOffset = LinkageSize;
- unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
-
- static const MCPhysReg GPR[] = {
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10,
- };
- static const MCPhysReg *FPR = GetFPR();
-
- static const MCPhysReg VR[] = {
- PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
- PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
- };
- static const MCPhysReg VSRH[] = {
- PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
- PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
- };
-
- const unsigned NumGPRs = array_lengthof(GPR);
- const unsigned NumFPRs = 13;
- const unsigned NumVRs = array_lengthof(VR);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -4375,22 +4774,31 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
EVT ArgVT = Outs[i].VT;
EVT OrigVT = Outs[i].ArgVT;
- /* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
-
- /* Compute GPR index associated with argument offset. */
- GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
- GPR_idx = std::min(GPR_idx, NumGPRs);
-
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff;
- PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ // We re-align the argument offset for each argument, except when using the
+ // fast calling convention, when we need to make sure we do that only when
+ // we'll actually use a stack slot.
+ auto ComputePtrOff = [&]() {
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ };
+
+ if (CallConv != CallingConv::Fast) {
+ ComputePtrOff();
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
+ }
// Promote integers to 64-bit values.
if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
@@ -4415,6 +4823,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -4423,7 +4834,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(), VT,
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
continue;
@@ -4433,7 +4844,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (GPR_idx == NumGPRs && Size < 8) {
SDValue AddPtr = PtrOff;
if (!isLittleEndian) {
- SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
PtrOff.getValueType());
AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
}
@@ -4473,7 +4884,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// parameter save area instead of a new local variable.
SDValue AddPtr = PtrOff;
if (!isLittleEndian) {
- SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
}
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
@@ -4485,7 +4896,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
// Done with this argument.
ArgOffset += PtrByteSize;
@@ -4495,7 +4906,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// For aggregates larger than PtrByteSize, copy the pieces of the
// object that fit into registers from the parameter save area.
for (unsigned j=0; j<Size; j+=PtrByteSize) {
- SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -4521,13 +4932,19 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != NumGPRs) {
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += PtrByteSize;
}
- ArgOffset += PtrByteSize;
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += PtrByteSize;
break;
case MVT::f32:
case MVT::f64: {
@@ -4541,6 +4958,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// then the parameter save area. For now, put all arguments to vararg
// routines always in both locations (FPR *and* GPR or stack slot).
bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+ bool NeededLoad = false;
// First load the argument into the next available FPR.
if (FPR_idx != NumFPRs)
@@ -4549,7 +4967,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Next, load the argument into GPR or stack slot if needed.
if (!NeedGPROrStack)
;
- else if (GPR_idx != NumGPRs) {
+ else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
+ // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+ // once we support fp <-> gpr moves.
+
// In the non-vararg case, this can only ever happen in the
// presence of f32 array types, since otherwise we never run
// out of FPRs before running out of GPRs.
@@ -4580,7 +5001,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
if (!isLittleEndian)
ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
- DAG.getConstant(32, MVT::i32));
+ DAG.getConstant(32, dl, MVT::i32));
// Non-final even elements are skipped; they will be handled
// together the with subsequent argument on the next go-around.
@@ -4588,27 +5009,34 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
ArgVal = SDValue();
if (ArgVal.getNode())
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
if (Arg.getValueType() == MVT::f32 &&
!isLittleEndian && !Flags.isInConsecutiveRegs()) {
- SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
+
+ NeededLoad = true;
}
// When passing an array of floats, the array occupies consecutive
// space in the argument area; only round up to the next doubleword
// at the end of the array. Otherwise, each float takes 8 bytes.
- ArgOffset += (Arg.getValueType() == MVT::f32 &&
- Flags.isInConsecutiveRegs()) ? 4 : 8;
- if (Flags.isInConsecutiveRegsLast())
- ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ if (CallConv != CallingConv::Fast || NeededLoad) {
+ ArgOffset += (Arg.getValueType() == MVT::f32 &&
+ Flags.isInConsecutiveRegs()) ? 4 : 8;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
break;
}
case MVT::v4f32:
@@ -4617,6 +5045,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ case MVT::v1i128:
+ if (!Subtarget.hasQPX()) {
// These can be scalar arguments or elements of a vector array type
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
@@ -4649,7 +5079,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (GPR_idx == NumGPRs)
break;
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
- DAG.getConstant(i, PtrVT));
+ DAG.getConstant(i, dl, PtrVT));
SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
@@ -4667,12 +5097,73 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
RegsToPass.push_back(std::make_pair(VReg, Arg));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += 16;
}
- ArgOffset += 16;
+
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += 16;
break;
+ } // not QPX
+
+ assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
+ "Invalid QPX parameter type");
+
+ /* fall through */
+ case MVT::v4f64:
+ case MVT::v4i1: {
+ bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
+ if (isVarArg) {
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (QFPR_idx != NumQFPRs) {
+ SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
+ Store, PtrOff, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
+ }
+ ArgOffset += (IsF32 ? 16 : 32);
+ for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, dl, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs QPX params go into registers or on the stack.
+ if (QFPR_idx != NumQFPRs) {
+ RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += (IsF32 ? 16 : 32);
+ }
+
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += (IsF32 ? 16 : 32);
+ break;
+ }
}
}
@@ -4689,12 +5180,14 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
+ setUsesTOCBasePtr(DAG);
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
- SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(TOCSaveOffset),
false, false, 0);
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
@@ -4717,8 +5210,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
FPOp, true, TailCallArguments);
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
- RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
- Ins, InVals);
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
}
SDValue
@@ -4729,7 +5222,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
unsigned NumOps = Outs.size();
@@ -4751,8 +5245,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
- false);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned NumBytes = LinkageSize;
// Add up all the space actually used.
@@ -4797,7 +5290,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
CallConv == CallingConv::Fast)
- NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+ NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4810,7 +5303,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
dl);
SDValue CallSeqStart = Chain;
@@ -4844,8 +5337,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4868,7 +5359,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// register cannot be found for it.
SDValue PtrOff;
- PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
@@ -4897,7 +5388,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
ArgOffset += PtrByteSize;
} else {
- SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
@@ -4918,7 +5409,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// copy the pieces of the object that fit into registers from the
// parameter save area.
for (unsigned j=0; j<Size; j+=PtrByteSize) {
- SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -4971,7 +5462,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
- SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
MachinePointerInfo(),
@@ -5016,7 +5507,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, PtrVT));
+ DAG.getConstant(ArgOffset, dl, PtrVT));
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
@@ -5032,7 +5523,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
if (GPR_idx == NumGPRs)
break;
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
- DAG.getConstant(i, PtrVT));
+ DAG.getConstant(i, dl, PtrVT));
SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
@@ -5110,8 +5601,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
FPOp, true, TailCallArguments);
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
- RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
- Ins, InVals);
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
}
bool
@@ -5210,7 +5701,6 @@ SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -5221,7 +5711,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
// If the frame pointer save index hasn't been defined yet.
if (!RASI) {
// Find out what the fix offset of the frame pointer save area.
- int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+ int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
// Allocate the frame index for frame pointer save area.
RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
// Save the result.
@@ -5234,7 +5724,6 @@ SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -5245,9 +5734,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
// If the frame pointer save index hasn't been defined yet.
if (!FPSI) {
// Find out what the fix offset of the frame pointer save area.
- int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
- isDarwinABI);
-
+ int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
// Allocate the frame index for frame pointer save area.
FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
@@ -5268,7 +5755,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Negate the size.
SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
- DAG.getConstant(0, PtrVT), Size);
+ DAG.getConstant(0, dl, PtrVT), Size);
// Construct a node for the frame pointer save index.
SDValue FPSIdx = getFramePointerFrameIndex(DAG);
// Build a DYNALLOC node.
@@ -5293,6 +5780,9 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
}
SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType().isVector())
+ return LowerVectorLoad(Op, DAG);
+
assert(Op.getValueType() == MVT::i1 &&
"Custom lowering only for i1 loads");
@@ -5314,6 +5804,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getOperand(1).getValueType().isVector())
+ return LowerVectorStore(Op, DAG);
+
assert(Op.getOperand(1).getValueType() == MVT::i1 &&
"Custom lowering only for i1 stores");
@@ -5453,10 +5946,11 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
- Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
- PPCISD::FCTIDZ),
- dl, MVT::f64, Src);
+ Tmp = DAG.getNode(
+ Op.getOpcode() == ISD::FP_TO_SINT
+ ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
break;
case MVT::i64:
assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
@@ -5491,7 +5985,7 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
// add in a bias.
if (Op.getValueType() == MVT::i32 && !i32Stack) {
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
- DAG.getConstant(4, FIPtr.getValueType()));
+ DAG.getConstant(4, dl, FIPtr.getValueType()));
MPI = MPI.getWithOffset(4);
}
@@ -5500,8 +5994,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
RLI.MPI = MPI;
}
+/// \brief Custom lowers floating point to integer conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(
+ Op.getOpcode() == ISD::FP_TO_SINT
+ ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
+ Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
+ break;
+ case MVT::i64:
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
+ Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
+ break;
+ }
+ return Tmp;
+}
+
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SDLoc dl) const {
+ if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
+ return LowerFP_TO_INTDirectMove(Op, DAG, dl);
+
ReuseLoadInfo RLI;
LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
@@ -5579,30 +6111,92 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
}
+/// \brief Custom lowers integer to floating point conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert((Op.getValueType() == MVT::f32 ||
+ Op.getValueType() == MVT::f64) &&
+ "Invalid floating point type as target of conversion");
+ assert(Subtarget.hasFPCVT() &&
+ "Int to FP conversions with direct moves require FPCVT");
+ SDValue FP;
+ SDValue Src = Op.getOperand(0);
+ bool SinglePrec = Op.getValueType() == MVT::f32;
+ bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
+ bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
+ unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
+ (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
+
+ if (WordInt) {
+ FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
+ dl, MVT::f64, Src);
+ FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+ }
+ else {
+ FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
+ FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+ }
+
+ return FP;
+}
+
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+
+ if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
+ if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
+ return SDValue();
+
+ SDValue Value = Op.getOperand(0);
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ if (Op.getValueType() != MVT::v4f64)
+ Value = DAG.getNode(ISD::FP_ROUND, dl,
+ Op.getValueType(), Value,
+ DAG.getIntPtrConstant(1, dl));
+ return Value;
+ }
+
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
if (Op.getOperand(0).getValueType() == MVT::i1)
return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
- DAG.getConstantFP(1.0, Op.getValueType()),
- DAG.getConstantFP(0.0, Op.getValueType()));
+ DAG.getConstantFP(1.0, dl, Op.getValueType()),
+ DAG.getConstantFP(0.0, dl, Op.getValueType()));
+
+ // If we have direct moves, we can do all the conversion, skip the store/load
+ // however, without FPCVT we can't do most conversions.
+ if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+ return LowerINT_TO_FPDirectMove(Op, DAG, dl);
assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
- (Op.getOpcode() == ISD::UINT_TO_FP ?
- PPCISD::FCFIDUS : PPCISD::FCFIDS) :
- (Op.getOpcode() == ISD::UINT_TO_FP ?
- PPCISD::FCFIDU : PPCISD::FCFID);
- MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
- MVT::f32 : MVT::f64;
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+ : PPCISD::FCFIDS)
+ : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+ : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? MVT::f32
+ : MVT::f64;
if (Op.getOperand(0).getValueType() == MVT::i64) {
SDValue SINT = Op.getOperand(0);
@@ -5627,12 +6221,12 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// bit 12 (value 2048) is set instead, so that the final rounding
// to single-precision gets the correct result.
SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
- SINT, DAG.getConstant(2047, MVT::i64));
+ SINT, DAG.getConstant(2047, dl, MVT::i64));
Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
- Round, DAG.getConstant(2047, MVT::i64));
+ Round, DAG.getConstant(2047, dl, MVT::i64));
Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
Round = DAG.getNode(ISD::AND, dl, MVT::i64,
- Round, DAG.getConstant(-2048, MVT::i64));
+ Round, DAG.getConstant(-2048, dl, MVT::i64));
// However, we cannot use that value unconditionally: if the magnitude
// of the input value is small, the bit-twiddling we did above might
@@ -5643,11 +6237,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// bits are all sign-bit copies, and use the rounded value computed
// above otherwise.
SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
- SINT, DAG.getConstant(53, MVT::i32));
+ SINT, DAG.getConstant(53, dl, MVT::i32));
Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
- Cond, DAG.getConstant(1, MVT::i64));
+ Cond, DAG.getConstant(1, dl, MVT::i64));
Cond = DAG.getSetCC(dl, MVT::i32,
- Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+ Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
@@ -5720,7 +6314,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
- MVT::f32, FP, DAG.getIntPtrConstant(0));
+ MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
return FP;
}
@@ -5790,7 +6384,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// FCFID it and return it.
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
- FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
+ DAG.getIntPtrConstant(0, dl));
return FP;
}
@@ -5834,7 +6429,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
StackSlot, MachinePointerInfo(), false, false,0);
// Load FP Control Word from low 32 bits of stack slot.
- SDValue Four = DAG.getConstant(4, PtrVT);
+ SDValue Four = DAG.getConstant(4, dl, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
false, false, false, 0);
@@ -5842,14 +6437,14 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// Transform as necessary
SDValue CWD1 =
DAG.getNode(ISD::AND, dl, MVT::i32,
- CWD, DAG.getConstant(3, MVT::i32));
+ CWD, DAG.getConstant(3, dl, MVT::i32));
SDValue CWD2 =
DAG.getNode(ISD::SRL, dl, MVT::i32,
DAG.getNode(ISD::AND, dl, MVT::i32,
DAG.getNode(ISD::XOR, dl, MVT::i32,
- CWD, DAG.getConstant(3, MVT::i32)),
- DAG.getConstant(3, MVT::i32)),
- DAG.getConstant(1, MVT::i32));
+ CWD, DAG.getConstant(3, dl, MVT::i32)),
+ DAG.getConstant(3, dl, MVT::i32)),
+ DAG.getConstant(1, dl, MVT::i32));
SDValue RetVal =
DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
@@ -5874,12 +6469,12 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT AmtVT = Amt.getValueType();
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
- DAG.getConstant(BitWidth, AmtVT), Amt);
+ DAG.getConstant(BitWidth, dl, AmtVT), Amt);
SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
- DAG.getConstant(-BitWidth, AmtVT));
+ DAG.getConstant(-BitWidth, dl, AmtVT));
SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
@@ -5903,12 +6498,12 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT AmtVT = Amt.getValueType();
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
- DAG.getConstant(BitWidth, AmtVT), Amt);
+ DAG.getConstant(BitWidth, dl, AmtVT), Amt);
SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
- DAG.getConstant(-BitWidth, AmtVT));
+ DAG.getConstant(-BitWidth, dl, AmtVT));
SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
@@ -5931,15 +6526,15 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT AmtVT = Amt.getValueType();
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
- DAG.getConstant(BitWidth, AmtVT), Amt);
+ DAG.getConstant(BitWidth, dl, AmtVT), Amt);
SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
- DAG.getConstant(-BitWidth, AmtVT));
+ DAG.getConstant(-BitWidth, dl, AmtVT));
SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
- SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+ SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
Tmp4, Tmp6, ISD::SETLE);
SDValue OutOps[] = { OutLo, OutHi };
return DAG.getMergeValues(OutOps, dl);
@@ -5955,7 +6550,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, SDLoc dl) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
- static const EVT VTys[] = { // canonical VT to use for each size.
+ static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
@@ -5968,7 +6563,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
EVT CanonicalVT = VTys[SplatSize-1];
// Build a canonical splat for this value.
- SDValue Elt = DAG.getConstant(Val, MVT::i32);
+ SDValue Elt = DAG.getConstant(Val, dl, MVT::i32);
SmallVector<SDValue, 8> Ops;
Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
@@ -5982,7 +6577,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = Op.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
- DAG.getConstant(IID, MVT::i32), Op);
+ DAG.getConstant(IID, dl, MVT::i32), Op);
}
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
@@ -5992,7 +6587,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = LHS.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
- DAG.getConstant(IID, MVT::i32), LHS, RHS);
+ DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
}
/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
@@ -6002,7 +6597,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
SDLoc dl, EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = Op0.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
- DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+ DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
}
@@ -6032,12 +6627,134 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+ if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
+ // We first build an i32 vector, load it into a QPX register,
+ // then convert it to a floating-point vector and compare it
+ // to a zero vector to get the boolean result.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ assert(BVN->getNumOperands() == 4 &&
+ "BUILD_VECTOR for v4i1 does not have 4 operands");
+
+ bool IsConst = true;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
+ IsConst = false;
+ break;
+ }
+ }
+
+ if (IsConst) {
+ Constant *One =
+ ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
+ Constant *NegOne =
+ ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
+
+ SmallVector<Constant*, 4> CV(4, NegOne);
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+ CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
+ else if (cast<ConstantSDNode>(BVN->getOperand(i))->
+ getConstantIntValue()->isZero())
+ continue;
+ else
+ CV[i] = One;
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
+ 16 /* alignment */);
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(DAG.getEntryNode());
+ Ops.push_back(CPIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::v4i1);
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
+ dl, VTs, Ops, MVT::v4f32,
+ MachinePointerInfo::getConstantPool());
+ }
+
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = 4*i;
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
+ if (StoreSize > 4) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ BVN->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ MVT::i32, false, false, 0));
+ } else {
+ SDValue StoreValue = BVN->getOperand(i);
+ if (StoreSize < 4)
+ StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
+
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ StoreValue, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, 0));
+ }
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty())
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Now load from v4i32 into the QPX register; this will extend it to
+ // v4i64 but not yet convert it to a floating point. Nevertheless, this
+ // is typed as v4f64 because the QPX register integer states are not
+ // explicitly represented.
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32));
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::v4f64);
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+ LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
+ LoadedVect);
+
+ SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::f64);
+ FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPZeros, FPZeros, FPZeros, FPZeros);
+
+ return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
+ }
+
+ // All other QPX vectors are handled by generic code.
+ if (Subtarget.hasQPX())
+ return SDValue();
+
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, 0, true) || SplatBitSize > 32)
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+ SplatBitSize > 32)
return SDValue();
unsigned SplatBits = APSplatBits.getZExtValue();
@@ -6050,7 +6767,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (SplatBits == 0) {
// Canonicalize all zero vectors to be v4i32.
if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
- SDValue Z = DAG.getConstant(0, MVT::i32);
+ SDValue Z = DAG.getConstant(0, dl, MVT::i32);
Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
}
@@ -6077,10 +6794,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// To avoid having these optimizations undone by constant folding,
// we convert to a pseudo that will be expanded later into one of
// the above forms.
- SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+ SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
EVT VT = (SplatSize == 1 ? MVT::v16i8 :
(SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
- SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
+ SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
if (VT == Op.getValueType())
return RetVal;
@@ -6104,22 +6821,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
- // The remaining cases assume either big endian element order or
- // a splat-size that equates to the element size of the vector
- // to be built. An example that doesn't work for little endian is
- // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
- // and a vector element size of 16 bits. The code below will
- // produce the vector in big endian element order, which for little
- // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
-
- // For now, just avoid these optimizations in that case.
- // FIXME: Develop correct optimizations for LE with mismatched
- // splat and element sizes.
-
- if (Subtarget.isLittleEndian() &&
- SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
- return SDValue();
-
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -6290,6 +6991,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
EVT VT = Op.getValueType();
bool isLittleEndian = Subtarget.isLittleEndian();
+ if (Subtarget.hasQPX()) {
+ if (VT.getVectorNumElements() != 4)
+ return SDValue();
+
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
+ if (AlignIdx != -1) {
+ return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
+ DAG.getConstant(AlignIdx, dl, MVT::i32));
+ } else if (SVOp->isSplat()) {
+ int SplatIdx = SVOp->getSplatIndex();
+ if (SplatIdx >= 4) {
+ std::swap(V1, V2);
+ SplatIdx -= 4;
+ }
+
+ // FIXME: If SplatIdx == 0 and the input came from a load, then there is
+ // nothing to do.
+
+ return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
+ DAG.getConstant(SplatIdx, dl, MVT::i32));
+ }
+
+ // Lower this into a qvgpci/qvfperm pair.
+
+ // Compute the qvgpci literal
+ unsigned idx = 0;
+ for (unsigned i = 0; i < 4; ++i) {
+ int m = SVOp->getMaskElt(i);
+ unsigned mm = m >= 0 ? (unsigned) m : i;
+ idx |= mm << (3-i)*3;
+ }
+
+ SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
+ DAG.getConstant(idx, dl, MVT::i32));
+ return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
+ }
+
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
@@ -6299,6 +7039,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
PPC::isSplatShuffleMask(SVOp, 4) ||
PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
@@ -6316,6 +7057,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
@@ -6401,10 +7143,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
for (unsigned j = 0; j != BytesPerElement; ++j)
if (isLittleEndian)
- ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
- MVT::i32));
+ ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
+ dl, MVT::i32));
else
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
MVT::i32));
}
@@ -6422,7 +7164,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
/// altivec comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
- bool &isDot) {
+ bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
@@ -6435,29 +7177,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 1;
+ }
+ else
+ return false;
+ break;
+
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtud:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
}
return true;
}
@@ -6471,14 +7267,14 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
int CompareOpc;
bool isDot;
- if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
if (!isDot) {
SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
Op.getOperand(1), Op.getOperand(2),
- DAG.getConstant(CompareOpc, MVT::i32));
+ DAG.getConstant(CompareOpc, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
}
@@ -6486,7 +7282,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Ops[] = {
Op.getOperand(2), // LHS
Op.getOperand(3), // RHS
- DAG.getConstant(CompareOpc, MVT::i32)
+ DAG.getConstant(CompareOpc, dl, MVT::i32)
};
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
@@ -6518,15 +7314,15 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Shift the bit into the low position.
Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
- DAG.getConstant(8-(3-BitNo), MVT::i32));
+ DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
// Isolate the bit.
Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, dl, MVT::i32));
// If we are supposed to, toggle the bit.
if (InvertBit)
Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, dl, MVT::i32));
return Flags;
}
@@ -6572,6 +7368,304 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
false, false, false, 0);
}
+SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDNode *N = Op.getNode();
+
+ assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
+ "Unknown extract_vector_elt type");
+
+ SDValue Value = N->getOperand(0);
+
+ // The first part of this is like the store lowering except that we don't
+ // need to track the chain.
+
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+ // understand how to form the extending load.
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ // Now convert to an integer and store.
+ Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
+ Value);
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue StoreChain = DAG.getEntryNode();
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
+ Ops.push_back(Value);
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+ // Extract the value requested.
+ unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, false, 0);
+
+ if (!Subtarget.useCRBits())
+ return IntVal;
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
+}
+
+/// Lowering for QPX v4i1 loads
+SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ SDValue LoadChain = LN->getChain();
+ SDValue BasePtr = LN->getBasePtr();
+
+ if (Op.getValueType() == MVT::v4f64 ||
+ Op.getValueType() == MVT::v4f32) {
+ EVT MemVT = LN->getMemoryVT();
+ unsigned Alignment = LN->getAlignment();
+
+ // If this load is properly aligned, then it is legal.
+ if (Alignment >= MemVT.getStoreSize())
+ return Op;
+
+ EVT ScalarVT = Op.getValueType().getScalarType(),
+ ScalarMemVT = MemVT.getScalarType();
+ unsigned Stride = ScalarMemVT.getStoreSize();
+
+ SmallVector<SDValue, 8> Vals, LoadChains;
+ for (unsigned Idx = 0; Idx < 4; ++Idx) {
+ SDValue Load;
+ if (ScalarVT != ScalarMemVT)
+ Load =
+ DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
+ BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx*Stride),
+ ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+ LN->getAAInfo());
+ else
+ Load =
+ DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx*Stride),
+ LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+ LN->getAAInfo());
+
+ if (Idx == 0 && LN->isIndexed()) {
+ assert(LN->getAddressingMode() == ISD::PRE_INC &&
+ "Unknown addressing mode on vector load");
+ Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
+ LN->getAddressingMode());
+ }
+
+ Vals.push_back(Load);
+ LoadChains.push_back(Load.getValue(1));
+
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, dl,
+ BasePtr.getValueType()));
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getValueType(), Vals);
+
+ if (LN->isIndexed()) {
+ SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ SDValue RetOps[] = { Value, TF };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
+ assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
+
+ // To lower v4i1 from a byte array, we load the byte elements of the
+ // vector and then reuse the BUILD_VECTOR logic.
+
+ SmallVector<SDValue, 4> VectElmts, VectElmtChains;
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+ VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
+ dl, MVT::i32, LoadChain, Idx,
+ LN->getPointerInfo().getWithOffset(i),
+ MVT::i8 /* memory type */,
+ LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(),
+ 1 /* alignment */, LN->getAAInfo()));
+ VectElmtChains.push_back(VectElmts[i].getValue(1));
+ }
+
+ LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
+
+ SDValue RVals[] = { Value, LoadChain };
+ return DAG.getMergeValues(RVals, dl);
+}
+
+/// Lowering for QPX v4i1 stores
+SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ SDValue StoreChain = SN->getChain();
+ SDValue BasePtr = SN->getBasePtr();
+ SDValue Value = SN->getValue();
+
+ if (Value.getValueType() == MVT::v4f64 ||
+ Value.getValueType() == MVT::v4f32) {
+ EVT MemVT = SN->getMemoryVT();
+ unsigned Alignment = SN->getAlignment();
+
+ // If this store is properly aligned, then it is legal.
+ if (Alignment >= MemVT.getStoreSize())
+ return Op;
+
+ EVT ScalarVT = Value.getValueType().getScalarType(),
+ ScalarMemVT = MemVT.getScalarType();
+ unsigned Stride = ScalarMemVT.getStoreSize();
+
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < 4; ++Idx) {
+ SDValue Ex =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+ DAG.getConstant(Idx, dl, getVectorIdxTy()));
+ SDValue Store;
+ if (ScalarVT != ScalarMemVT)
+ Store =
+ DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx*Stride),
+ ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
+ MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+ else
+ Store =
+ DAG.getStore(StoreChain, dl, Ex, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx*Stride),
+ SN->isVolatile(), SN->isNonTemporal(),
+ MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+
+ if (Idx == 0 && SN->isIndexed()) {
+ assert(SN->getAddressingMode() == ISD::PRE_INC &&
+ "Unknown addressing mode on vector store");
+ Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
+ SN->getAddressingMode());
+ }
+
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, dl,
+ BasePtr.getValueType()));
+ Stores.push_back(Store);
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ if (SN->isIndexed()) {
+ SDValue RetOps[] = { TF, Stores[0].getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ return TF;
+ }
+
+ assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
+ assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
+
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+ // understand how to form the extending load.
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ // Now convert to an integer and store.
+ Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
+ Value);
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
+ Ops.push_back(Value);
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+ // Move data into the byte array.
+ SmallVector<SDValue, 4> Loads, LoadChains;
+ for (unsigned i = 0; i < 4; ++i) {
+ unsigned Offset = 4*i;
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, false, 0));
+ LoadChains.push_back(Loads[i].getValue(1));
+ }
+
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+ Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
+ SN->getPointerInfo().getWithOffset(i),
+ MVT::i8 /* memory type */,
+ SN->isNonTemporal(), SN->isVolatile(),
+ 1 /* alignment */, SN->getAAInfo()));
+ }
+
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ return StoreChain;
+}
+
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (Op.getValueType() == MVT::v4i32) {
@@ -6694,6 +7788,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
// For counter-based loop handling.
@@ -6708,7 +7803,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
- const TargetMachine &TM = getTargetMachine();
SDLoc dl(N);
switch (N->getOpcode()) {
default:
@@ -6739,8 +7833,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
break;
}
case ISD::VAARG: {
- if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
- || TM.getSubtarget<PPCSubtarget>().isPPC64())
+ if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
return;
EVT VT = N->getValueType(0);
@@ -6758,10 +7851,10 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getOperand(0).getValueType() == MVT::ppcf128);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
MVT::f64, N->getOperand(0),
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0, dl));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
MVT::f64, N->getOperand(0),
- DAG.getIntPtrConstant(1));
+ DAG.getIntPtrConstant(1, dl));
// Add the two halves of the long double in round-to-zero mode.
SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
@@ -6773,6 +7866,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
// LowerFP_TO_INT() can only handle f32 and f64.
if (N->getOperand(0).getValueType() == MVT::ppcf128)
return;
@@ -6789,7 +7883,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Function *Func = Intrinsic::getDeclaration(M, Id);
- return Builder.CreateCall(Func);
+ return Builder.CreateCall(Func, {});
}
// The mappings for emitLeading/TrailingFence is taken from
@@ -6799,10 +7893,9 @@ Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
bool IsLoad) const {
if (Ord == SequentiallyConsistent)
return callIntrinsic(Builder, Intrinsic::ppc_sync);
- else if (isAtLeastRelease(Ord))
+ if (isAtLeastRelease(Ord))
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
- else
- return nullptr;
+ return nullptr;
}
Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
@@ -6814,16 +7907,40 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
- else
- return nullptr;
+ return nullptr;
}
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- bool is64bit, unsigned BinOpcode) const {
+ unsigned AtomicSize,
+ unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch (AtomicSize) {
+ default:
+ llvm_unreachable("Unexpected size of atomic entity");
+ case 1:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 2:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 4:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case 8:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -6846,7 +7963,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
- RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
+ RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
// thisMBB:
@@ -6861,11 +7978,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -6883,9 +8000,12 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode) const {
+ // If we support part-word atomic mnemonics, just use them
+ if (Subtarget.hasPartwordAtomics())
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
+
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
@@ -7012,8 +8132,7 @@ llvm::MachineBasicBlock*
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7086,6 +8205,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned BufReg = MI->getOperand(1).getReg();
if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
+ setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
.addImm(TOCOffset)
@@ -7096,23 +8216,21 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// Naked functions never have a base pointer, and so we use r1. For all
// other functions, this decision must be delayed until during PEI.
unsigned BaseReg;
- if (MF->getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::Naked))
+ if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
else
BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
MIB = BuildMI(*thisMBB, MI, DL,
TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
- .addReg(BaseReg)
- .addImm(BPOffset)
- .addReg(BufReg);
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
- const PPCRegisterInfo *TRI =
- getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo();
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -7126,8 +8244,9 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// mainMBB:
// mainDstReg = 0
- MIB = BuildMI(mainMBB, DL,
- TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+ MIB =
+ BuildMI(mainMBB, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
// Store IP
if (Subtarget.isPPC64()) {
@@ -7161,8 +8280,7 @@ MachineBasicBlock *
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7181,10 +8299,13 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
- unsigned BP = (PVT == MVT::i64) ? PPC::X30 :
- (Subtarget.isSVR4ABI() &&
- MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
- PPC::R29 : PPC::R30);
+ unsigned BP =
+ (PVT == MVT::i64)
+ ? PPC::X30
+ : (Subtarget.isSVR4ABI() &&
+ MF->getTarget().getRelocationModel() == Reloc::PIC_
+ ? PPC::R29
+ : PPC::R30);
MachineInstrBuilder MIB;
@@ -7247,6 +8368,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Reload TOC
if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
+ setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
@@ -7267,8 +8389,20 @@ MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
if (MI->getOpcode() == TargetOpcode::STACKMAP ||
- MI->getOpcode() == TargetOpcode::PATCHPOINT)
+ MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
+ MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ // Call lowering should have added an r2 operand to indicate a dependence
+ // on the TOC base pointer value. It can't however, because there is no
+ // way to mark the dependence as implicit there, and so the stackmap code
+ // will confuse it with a regular operand. Instead, add the dependence
+ // here.
+ setUsesTOCBasePtr(*BB->getParent());
+ MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
+ }
+
return emitPatchPoint(MI, BB);
+ }
if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
@@ -7278,8 +8412,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return emitEHSjLjLongJmp(MI, BB);
}
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// To "insert" these instructions we actually have to insert their
// control-flow patterns.
@@ -7290,9 +8423,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8 ||
- MI->getOpcode() == PPC::SELECT_I4 ||
- MI->getOpcode() == PPC::SELECT_I8)) {
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8)) {
SmallVector<MachineOperand, 2> Cond;
if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8)
@@ -7302,8 +8435,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
Cond, MI->getOperand(2).getReg(),
MI->getOperand(3).getReg());
@@ -7311,15 +8442,23 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_QFRC ||
+ MI->getOpcode() == PPC::SELECT_CC_QSRC ||
+ MI->getOpcode() == PPC::SELECT_CC_QBRC ||
MI->getOpcode() == PPC::SELECT_CC_VRRC ||
MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSSRC ||
MI->getOpcode() == PPC::SELECT_CC_VSRC ||
MI->getOpcode() == PPC::SELECT_I4 ||
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_QFRC ||
+ MI->getOpcode() == PPC::SELECT_QSRC ||
+ MI->getOpcode() == PPC::SELECT_QBRC ||
MI->getOpcode() == PPC::SELECT_VRRC ||
MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSSRC ||
MI->getOpcode() == PPC::SELECT_VSRC) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
@@ -7351,8 +8490,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_QFRC ||
+ MI->getOpcode() == PPC::SELECT_QSRC ||
+ MI->getOpcode() == PPC::SELECT_QBRC ||
MI->getOpcode() == PPC::SELECT_VRRC ||
MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSSRC ||
MI->getOpcode() == PPC::SELECT_VSRC) {
BuildMI(BB, dl, TII->get(PPC::BC))
.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
@@ -7429,68 +8572,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
- BB = EmitAtomicBinary(MI, BB, false, 0);
+ BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
- BB = EmitAtomicBinary(MI, BB, true, 0);
+ BB = EmitAtomicBinary(MI, BB, 8, 0);
else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
- MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch(MI->getOpcode()) {
+ default:
+ llvm_unreachable("Compare and swap of unknown size");
+ case PPC::ATOMIC_CMP_SWAP_I8:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I16:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I32:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I64:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
@@ -7516,18 +8687,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(loop1MBB);
// loop1MBB:
- // l[wd]arx dest, ptr
+ // l[bhwd]arx dest, ptr
// cmp[wd] dest, oldval
// bne- midMBB
// loop2MBB:
- // st[wd]cx. newval, ptr
+ // st[bhwd]cx. newval, ptr
// bne- loopMBB
// b exitBB
// midMBB:
- // st[wd]cx. dest, ptr
+ // st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
.addReg(oldval).addReg(dest);
@@ -7537,7 +8708,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(midMBB);
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(newval).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
@@ -7546,7 +8717,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(exitMBB);
BB = midMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(dest).addReg(ptrA).addReg(ptrB);
BB->addSuccessor(exitMBB);
@@ -7724,7 +8895,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
// Restore FPSCR value.
- BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
} else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
@@ -7746,6 +8917,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
MI->getOperand(0).getReg())
.addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ } else if (MI->getOpcode() == PPC::TCHECK_RET) {
+ DebugLoc Dl = MI->getDebugLoc();
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
+ return BB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -7764,9 +8941,11 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
bool &UseOneConstNR) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+ (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+ (VT == MVT::v4f64 && Subtarget.hasQPX())) {
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. For both FRE and FRSQRTE, the minimum
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7785,9 +8964,11 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
unsigned &RefinementSteps) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
- (VT == MVT::f64 && Subtarget.hasFRE()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+ (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+ (VT == MVT::v4f64 && Subtarget.hasQPX())) {
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. For both FRE and FRSQRTE, the minimum
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7873,6 +9054,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
+ case Intrinsic::ppc_qpx_qvlfd:
+ case Intrinsic::ppc_qpx_qvlfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfs:
+ case Intrinsic::ppc_qpx_qvlfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcd:
+ case Intrinsic::ppc_qpx_qvlfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcs:
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ VT = MVT::v2f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfiwa:
+ case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_vsx_lxvw4x:
@@ -7899,6 +9098,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
+ case Intrinsic::ppc_qpx_qvstfd:
+ case Intrinsic::ppc_qpx_qvstfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ case Intrinsic::ppc_qpx_qvstfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcd:
+ case Intrinsic::ppc_qpx_qvstfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcs:
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ VT = MVT::v2f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfiw:
+ case Intrinsic::ppc_qpx_qvstfiwa:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_vsx_stxvw4x:
@@ -7997,8 +9214,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
- assert(Subtarget.useCRBits() &&
- "Expecting to be tracking CR bits");
+ assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
// If we're tracking CR bits, we need to be careful that we don't have:
// trunc(binary-ops(zext(x), zext(y)))
// or
@@ -8294,10 +9510,8 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
N->getValueType(0) != MVT::i64)
return SDValue();
- if (!((N->getOperand(0).getValueType() == MVT::i1 &&
- Subtarget.useCRBits()) ||
- (N->getOperand(0).getValueType() == MVT::i32 &&
- Subtarget.isPPC64())))
+ if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
+ (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
return SDValue();
if (N->getOperand(0).getOpcode() != ISD::AND &&
@@ -8539,13 +9753,13 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
DAG.getConstant(APInt::getLowBitsSet(
N->getValueSizeInBits(0), PromBits),
- N->getValueType(0)));
+ dl, N->getValueType(0)));
assert(N->getOpcode() == ISD::SIGN_EXTEND &&
"Invalid extension type");
EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
SDValue ShiftCst =
- DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
+ DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
DAG.getNode(ISD::SHL, dl, N->getValueType(0),
N->getOperand(0), ShiftCst), ShiftCst);
@@ -8582,13 +9796,14 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
- (Op.getOpcode() == ISD::UINT_TO_FP ?
- PPCISD::FCFIDUS : PPCISD::FCFIDS) :
- (Op.getOpcode() == ISD::UINT_TO_FP ?
- PPCISD::FCFIDU : PPCISD::FCFID);
- MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
- MVT::f32 : MVT::f64;
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+ : PPCISD::FCFIDS)
+ : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+ : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? MVT::f32
+ : MVT::f64;
// If we're converting from a float, to an int, and back to a float again,
// then we don't need the store/load pair at all.
@@ -8610,7 +9825,7 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
FP = DAG.getNode(ISD::FP_ROUND, dl,
- MVT::f32, FP, DAG.getIntPtrConstant(0));
+ MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
DCI.AddToWorklist(FP.getNode());
}
@@ -8721,7 +9936,6 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- const TargetMachine &TM = getTargetMachine();
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
switch (N->getOpcode()) {
@@ -8758,8 +9972,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return combineFPToIntToFP(N, DCI);
case ISD::STORE: {
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
- if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
- !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
N->getOperand(1).getValueType() == MVT::i32 &&
N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
@@ -8790,8 +10003,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
N->getOperand(1).getValueType() == MVT::i16 ||
- (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
- TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
N->getOperand(1).getValueType() == MVT::i64))) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
@@ -8812,8 +10024,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT VT = N->getOperand(1).getValueType();
if (VT.isSimple()) {
MVT StoreVT = VT.getSimpleVT();
- if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
- TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
(StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
return expandVSXStoreForLE(N, DCI);
@@ -8827,23 +10038,26 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// For little endian, VSX loads require generating lxvd2x/xxswapd.
if (VT.isSimple()) {
MVT LoadVT = VT.getSimpleVT();
- if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
- TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
(LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
return expandVSXLoadForLE(N, DCI);
}
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ EVT MemVT = LD->getMemoryVT();
+ Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
- if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
- TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
- // P8 and later hardware should just use LOAD.
- !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
- (VT == MVT::v16i8 || VT == MVT::v8i16 ||
- VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
+ unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
+ if (LD->isUnindexed() && VT.isVector() &&
+ ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
+ // P8 and later hardware should just use LOAD.
+ !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32)) ||
+ (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
+ LD->getAlignment() >= ScalarABIAlignment)) &&
LD->getAlignment() < ABIAlignment) {
- // This is a type-legal unaligned Altivec load.
+ // This is a type-legal unaligned Altivec or QPX load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
bool isLittleEndian = Subtarget.isLittleEndian();
@@ -8872,10 +10086,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// a different base address offset from this one by an aligned amount.
// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
// optimization later.
- Intrinsic::ID Intr = (isLittleEndian ?
- Intrinsic::ppc_altivec_lvsr :
- Intrinsic::ppc_altivec_lvsl);
- SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
+ Intrinsic::ID Intr, IntrLD, IntrPerm;
+ MVT PermCntlTy, PermTy, LDTy;
+ if (Subtarget.hasAltivec()) {
+ Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl;
+ IntrLD = Intrinsic::ppc_altivec_lvx;
+ IntrPerm = Intrinsic::ppc_altivec_vperm;
+ PermCntlTy = MVT::v16i8;
+ PermTy = MVT::v4i32;
+ LDTy = MVT::v4i32;
+ } else {
+ Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
+ Intrinsic::ppc_qpx_qvlpcls;
+ IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
+ Intrinsic::ppc_qpx_qvlfs;
+ IntrPerm = Intrinsic::ppc_qpx_qvfperm;
+ PermCntlTy = MVT::v4f64;
+ PermTy = MVT::v4f64;
+ LDTy = MemVT.getSimpleVT();
+ }
+
+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
// Create the new MMO for the new base load. It is like the original MMO,
// but represents an area in memory almost twice the vector size centered
@@ -8884,18 +10116,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// original unaligned load.
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *BaseMMO =
- MF.getMachineMemOperand(LD->getMemOperand(),
- -LD->getMemoryVT().getStoreSize()+1,
- 2*LD->getMemoryVT().getStoreSize()-1);
+ MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
+ 2*MemVT.getStoreSize()-1);
// Create the new base load.
- SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
- getPointerTy());
+ SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy());
SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue BaseLoad =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
- DAG.getVTList(MVT::v4i32, MVT::Other),
- BaseLoadOps, MVT::v4i32, BaseMMO);
+ DAG.getVTList(PermTy, MVT::Other),
+ BaseLoadOps, LDTy, BaseMMO);
// Note that the value of IncOffset (which is provided to the next
// load's pointer info offset value, and thus used to calculate the
@@ -8914,17 +10144,17 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (!findConsecutiveLoad(LD, DAG))
--IncValue;
- SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
+ SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
MachineMemOperand *ExtraMMO =
MF.getMachineMemOperand(LD->getMemOperand(),
- 1, 2*LD->getMemoryVT().getStoreSize()-1);
+ 1, 2*MemVT.getStoreSize()-1);
SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue ExtraLoad =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
- DAG.getVTList(MVT::v4i32, MVT::Other),
- ExtraLoadOps, MVT::v4i32, ExtraMMO);
+ DAG.getVTList(PermTy, MVT::Other),
+ ExtraLoadOps, LDTy, ExtraMMO);
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
BaseLoad.getValue(1), ExtraLoad.getValue(1));
@@ -8936,14 +10166,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// and ExtraLoad here.
SDValue Perm;
if (isLittleEndian)
- Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ Perm = BuildIntrinsicOp(IntrPerm,
ExtraLoad, BaseLoad, PermCntl, DAG, dl);
else
- Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ Perm = BuildIntrinsicOp(IntrPerm,
BaseLoad, ExtraLoad, PermCntl, DAG, dl);
- if (VT != MVT::v4i32)
- Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+ if (VT != PermTy)
+ Perm = Subtarget.hasAltivec() ?
+ DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
+ DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
+ DAG.getTargetConstant(1, dl, MVT::i64));
+ // second argument is 1 because this rounding
+ // is always exact.
// The output of the permutation is our loaded result, the TokenFactor is
// our new chain.
@@ -8952,40 +10187,67 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- case ISD::INTRINSIC_WO_CHAIN: {
- bool isLittleEndian = Subtarget.isLittleEndian();
- Intrinsic::ID Intr = (isLittleEndian ?
- Intrinsic::ppc_altivec_lvsr :
- Intrinsic::ppc_altivec_lvsl);
- if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
+ case ISD::INTRINSIC_WO_CHAIN: {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
+ : Intrinsic::ppc_altivec_lvsl);
+ if ((IID == Intr ||
+ IID == Intrinsic::ppc_qpx_qvlpcld ||
+ IID == Intrinsic::ppc_qpx_qvlpcls) &&
N->getOperand(1)->getOpcode() == ISD::ADD) {
- SDValue Add = N->getOperand(1);
-
- if (DAG.MaskedValueIsZero(Add->getOperand(1),
- APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
- Add.getValueType().getScalarType().getSizeInBits()))) {
- SDNode *BasePtr = Add->getOperand(0).getNode();
- for (SDNode::use_iterator UI = BasePtr->use_begin(),
- UE = BasePtr->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- Intr) {
- // We've found another LVSL/LVSR, and this address is an aligned
- // multiple of that one. The results will be the same, so use the
- // one we've just found instead.
-
- return SDValue(*UI, 0);
+ SDValue Add = N->getOperand(1);
+
+ int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
+ 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
+
+ if (DAG.MaskedValueIsZero(
+ Add->getOperand(1),
+ APInt::getAllOnesValue(Bits /* alignment */)
+ .zext(
+ Add.getValueType().getScalarType().getSizeInBits()))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
+ // We've found another LVSL/LVSR, and this address is an aligned
+ // multiple of that one. The results will be the same, so use the
+ // one we've just found instead.
+
+ return SDValue(*UI, 0);
+ }
+ }
+ }
+
+ if (isa<ConstantSDNode>(Add->getOperand(1))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(UI->getOperand(1)) &&
+ (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
+ cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
+ (1ULL << Bits) == 0) {
+ SDNode *OtherAdd = *UI;
+ for (SDNode::use_iterator VI = OtherAdd->use_begin(),
+ VE = OtherAdd->use_end(); VI != VE; ++VI) {
+ if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
+ return SDValue(*VI, 0);
+ }
+ }
+ }
}
}
}
}
- }
break;
case ISD::INTRINSIC_W_CHAIN: {
// For little endian, VSX loads require generating lxvd2x/xxswapd.
- if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
- TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default:
break;
@@ -8998,8 +10260,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::INTRINSIC_VOID: {
// For little endian, VSX stores require generating xxswapd/stxvd2x.
- if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
- TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default:
break;
@@ -9015,8 +10276,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
- (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
- TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
N->getValueType(0) == MVT::i64))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
@@ -9165,7 +10425,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
- getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
@@ -9185,7 +10445,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Ops[] = {
LHS.getOperand(2), // LHS of compare
LHS.getOperand(3), // RHS of compare
- DAG.getConstant(CompareOpc, MVT::i32)
+ DAG.getConstant(CompareOpc, dl, MVT::i32)
};
EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
@@ -9209,7 +10469,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
- DAG.getConstant(CompOpc, MVT::i32),
+ DAG.getConstant(CompOpc, dl, MVT::i32),
DAG.getRegister(PPC::CR6, MVT::i32),
N->getOperand(4), CompNode.getValue(1));
}
@@ -9237,14 +10497,14 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
bool IsNegPow2 = (-Divisor).isPowerOf2();
unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
- SDValue ShiftAmt = DAG.getConstant(Lg2, VT);
+ SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
if (Created)
Created->push_back(Op.getNode());
if (IsNegPow2) {
- Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op);
+ Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
if (Created)
Created->push_back(Op.getNode());
}
@@ -9278,14 +10538,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpequb_p:
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpequd_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
KnownZero = ~1U; // All bits but the low one are known to be zero.
break;
}
@@ -9307,9 +10570,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
if (!ML)
break;
- const PPCInstrInfo *TII =
- static_cast<const PPCInstrInfo *>(getTargetMachine().getSubtargetImpl()->
- getInstrInfo());
+ const PPCInstrInfo *TII = Subtarget.getInstrInfo();
// For small loops (between 5 and 8 instructions), align to a 32-byte
// boundary so that the entire loop fits in one instruction-cache line.
@@ -9414,8 +10675,9 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
return weight;
}
-std::pair<unsigned, const TargetRegisterClass*>
-PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+std::pair<unsigned, const TargetRegisterClass *>
+PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
@@ -9433,8 +10695,16 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, &PPC::F4RCRegClass);
if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::F8RCRegClass);
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
break;
case 'v':
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
return std::make_pair(0U, &PPC::VRRCRegClass);
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
@@ -9445,11 +10715,14 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Constraint == "wf") {
return std::make_pair(0U, &PPC::VSRCRegClass);
} else if (Constraint == "ws") {
- return std::make_pair(0U, &PPC::VSFRCRegClass);
+ if (VT == MVT::f32)
+ return std::make_pair(0U, &PPC::VSSRCRegClass);
+ else
+ return std::make_pair(0U, &PPC::VSFRCRegClass);
}
- std::pair<unsigned, const TargetRegisterClass*> R =
- TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ std::pair<unsigned, const TargetRegisterClass *> R =
+ TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
// (which we call X[0-9]+). If a 64-bit value has been requested, and a
@@ -9458,13 +10731,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
- PPC::GPRCRegClass.contains(R.first)) {
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+ PPC::GPRCRegClass.contains(R.first))
return std::make_pair(TRI->getMatchingSuperReg(R.first,
PPC::sub_32, &PPC::G8RCRegClass),
&PPC::G8RCRegClass);
- }
// GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
@@ -9500,6 +10770,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'P': {
ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
if (!CST) return; // Must be an immediate to match.
+ SDLoc dl(Op);
int64_t Value = CST->getSExtValue();
EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
// numbers are printed as such.
@@ -9507,35 +10778,35 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
default: llvm_unreachable("Unknown constraint letter!");
case 'I': // "I" is a signed 16-bit constant.
if (isInt<16>(Value))
- Result = DAG.getTargetConstant(Value, TCVT);
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
break;
case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
if (isShiftedUInt<16, 16>(Value))
- Result = DAG.getTargetConstant(Value, TCVT);
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
break;
case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
if (isShiftedInt<16, 16>(Value))
- Result = DAG.getTargetConstant(Value, TCVT);
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
break;
case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
if (isUInt<16>(Value))
- Result = DAG.getTargetConstant(Value, TCVT);
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
break;
case 'M': // "M" is a constant that is greater than 31.
if (Value > 31)
- Result = DAG.getTargetConstant(Value, TCVT);
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
break;
case 'N': // "N" is a positive constant that is an exact power of two.
if (Value > 0 && isPowerOf2_64(Value))
- Result = DAG.getTargetConstant(Value, TCVT);
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
break;
case 'O': // "O" is the constant zero.
if (Value == 0)
- Result = DAG.getTargetConstant(Value, TCVT);
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
break;
case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
if (isInt<16>(-Value))
- Result = DAG.getTargetConstant(Value, TCVT);
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
break;
}
break;
@@ -9555,7 +10826,9 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
Type *Ty) const {
- // FIXME: PPC does not allow r+i addressing modes for vectors!
+ // PPC does not allow r+i addressing modes for vectors!
+ if (Ty->isVectorTy() && AM.BaseOffs != 0)
+ return false;
// PPC allows a sign-extended 16-bit immediate field.
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
@@ -9604,14 +10877,12 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
-
- DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
- isPPC64? MVT::i64 : MVT::i32);
+ DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
+ isPPC64 ? MVT::i64 : MVT::i32);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
@@ -9639,8 +10910,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// Naked functions never have a frame pointer, and so we use r1. For all
// other functions, this decision must be delayed until during PEI.
unsigned FrameReg;
- if (MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::Naked))
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
else
FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
@@ -9668,7 +10938,7 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
bool is64Bit = isPPC64 && VT == MVT::i64;
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
- .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
+ .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
.Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
(is64Bit ? PPC::X13 : PPC::R13))
.Default(0);
@@ -9689,6 +10959,12 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
unsigned Intrinsic) const {
switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvlfd:
+ case Intrinsic::ppc_qpx_qvlfs:
+ case Intrinsic::ppc_qpx_qvlfcd:
+ case Intrinsic::ppc_qpx_qvlfcs:
+ case Intrinsic::ppc_qpx_qvlfiwa:
+ case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:
@@ -9710,6 +10986,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::ppc_vsx_lxvd2x:
VT = MVT::v2f64;
break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfs:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcd:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcs:
+ VT = MVT::v2f32;
+ break;
default:
VT = MVT::v4i32;
break;
@@ -9726,6 +11014,47 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.writeMem = false;
return true;
}
+ case Intrinsic::ppc_qpx_qvlfda:
+ case Intrinsic::ppc_qpx_qvlfsa:
+ case Intrinsic::ppc_qpx_qvlfcda:
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ case Intrinsic::ppc_qpx_qvlfiwaa:
+ case Intrinsic::ppc_qpx_qvlfiwza: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvlfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.size = VT.getStoreSize();
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_qpx_qvstfd:
+ case Intrinsic::ppc_qpx_qvstfs:
+ case Intrinsic::ppc_qpx_qvstfcd:
+ case Intrinsic::ppc_qpx_qvstfcs:
+ case Intrinsic::ppc_qpx_qvstfiw:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_altivec_stvebx:
@@ -9747,6 +11076,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::ppc_vsx_stxvd2x:
VT = MVT::v2f64;
break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcd:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcs:
+ VT = MVT::v2f32;
+ break;
default:
VT = MVT::v4i32;
break;
@@ -9763,6 +11104,41 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.writeMem = true;
return true;
}
+ case Intrinsic::ppc_qpx_qvstfda:
+ case Intrinsic::ppc_qpx_qvstfsa:
+ case Intrinsic::ppc_qpx_qvstfcda:
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ case Intrinsic::ppc_qpx_qvstfiwa: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvstfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.size = VT.getStoreSize();
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
default:
break;
}
@@ -9786,11 +11162,29 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
+ if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+ const Function *F = MF.getFunction();
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+ (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
+ // We should use Altivec/VSX loads and stores when available. For unaligned
+ // addresses, unaligned VSX loads are only fast starting with the P8.
+ if (Subtarget.hasAltivec() && Size >= 16 &&
+ (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+ ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ return MVT::v4i32;
+ }
+
if (Subtarget.isPPC64()) {
return MVT::i64;
- } else {
- return MVT::i32;
}
+
+ return MVT::i32;
}
/// \brief Returns true if it is beneficial to convert a load of a constant
@@ -9913,7 +11307,7 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
// as implicit-defs for stackmaps and patchpoints. The same reasoning applies
// to CTR, which is used by any indirect call.
static const MCPhysReg ScratchRegs[] = {
- PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0
+ PPC::X12, PPC::LR8, PPC::CTR8, 0
};
return ScratchRegs;
@@ -9925,6 +11319,11 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
if (VT == MVT::v2i64)
return false;
+ if (Subtarget.hasQPX()) {
+ if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
+ return true;
+ }
+
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 1b585d8e5090..c93de430fd05 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -24,7 +24,7 @@
namespace llvm {
namespace PPCISD {
- enum NodeType {
+ enum NodeType : unsigned {
// Start the numbering where the builtin ops and target ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
@@ -71,18 +71,9 @@ namespace llvm {
/// though these are usually folded into other nodes.
Hi, Lo,
- TOC_ENTRY,
-
/// The following two target-specific nodes are used for calls through
/// function pointers in the 64-bit SVR4 ABI.
- /// Like a regular LOAD but additionally taking/producing a flag.
- LOAD,
-
- /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
- /// destination.
- LOAD_TOC,
-
/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
/// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
/// compute an allocation on the stack.
@@ -108,10 +99,6 @@ namespace llvm {
/// SVR4 calls.
CALL, CALL_NOP,
- /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
- /// to access TLS variables.
- CALL_TLS, CALL_NOP_TLS,
-
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
@@ -132,6 +119,15 @@ namespace llvm {
/// resultant GPR. Bits corresponding to other CR regs are undefined.
MFOCRF,
+ /// Direct move from a VSX register to a GPR
+ MFVSR,
+
+ /// Direct move from a GPR to a VSX register (algebraic)
+ MTVSRA,
+
+ /// Direct move from a GPR to a VSX register (zero)
+ MTVSRZ,
+
// FIXME: Remove these once the ANDI glue bug is fixed:
/// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
/// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -179,14 +175,6 @@ namespace llvm {
/// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
MFFS,
- /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
- /// reserve indexed. This is used to implement atomic operations.
- LARX,
-
- /// STCX = This corresponds to PPC stcx. instrcution: store conditional
- /// indexed. This is used to implement atomic operations.
- STCX,
-
/// TC_RETURN - A tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@@ -203,7 +191,7 @@ namespace llvm {
PPC32_GOT,
/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
- /// local dynamic TLS on PPC32.
+ /// local dynamic TLS on PPC32.
PPC32_PICGOT,
/// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
@@ -230,26 +218,46 @@ namespace llvm {
/// register to sym\@got\@tlsgd\@ha.
ADDIS_TLSGD_HA,
- /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsgd\@l.
+ /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
ADDI_TLSGD_L,
+ /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+ /// register assignment.
+ ADDI_TLSGD_L_ADDR,
+
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsld\@ha.
ADDIS_TLSLD_HA,
- /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsld\@l.
+ /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
ADDI_TLSLD_L,
- /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
- /// local-dynamic TLS model, produces an ADDIS8 instruction
- /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
- /// to tie this in place following a copy to %X3 from the result
- /// of a GET_TLSLD_ADDR.
+ /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+ /// following register assignment.
+ ADDI_TLSLD_L_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds X3 to
+ /// sym\@dtprel\@ha.
ADDIS_DTPREL_HA,
/// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
@@ -267,6 +275,16 @@ namespace llvm {
/// operand identifies the operating system entry point.
SC,
+ /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
+ CLRBHRB,
+
+ /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
+ /// history rolling buffer entry.
+ MFBHRBE,
+
+ /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
+ RFEBB,
+
/// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
/// endian. Maps to an xxswapd instruction that corrects an lxvd2x
/// or stxvd2x instruction. The chain is necessary because the
@@ -274,6 +292,22 @@ namespace llvm {
/// of outputs.
XXSWAPD,
+ /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+ QVFPERM,
+
+ /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+ QVGPCI,
+
+ /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+ QVALIGNI,
+
+ /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+ QVESPLATI,
+
+ /// QBFLT = Access the underlying QPX floating-point boolean
+ /// representation.
+ QBFLT,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -300,21 +334,6 @@ namespace llvm {
/// destination 64-bit register.
LFIWZX,
- /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
- /// produces an ADDIS8 instruction that adds the TOC base register to
- /// sym\@toc\@ha.
- ADDIS_TOC_HA,
-
- /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
- /// produces a LD instruction with base register G8RReg and offset
- /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
- LD_TOC_L,
-
- /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
- /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
- /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
- ADDI_TOC_L,
-
/// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
/// Maps directly to an lxvd2x instruction that will be followed by
/// an xxswapd.
@@ -323,7 +342,16 @@ namespace llvm {
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
- STXVD2X
+ STXVD2X,
+
+ /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+ /// The 4xf32 load used for v4i1 constants.
+ QVLFSb,
+
+ /// GPRC = TOC_ENTRY GA, TOC
+ /// Loads the entry for GA from the TOC, where the TOC base is given by
+ /// the last operand.
+ TOC_ENTRY
};
}
@@ -339,6 +367,11 @@ namespace llvm {
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
+ /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUDUM instruction.
+ bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
+
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
@@ -359,10 +392,6 @@ namespace llvm {
/// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
- /// isAllNegativeZeroVector - Returns true if all elements of build_vector
- /// are -0.0.
- bool isAllNegativeZeroVector(SDNode *N);
-
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
@@ -372,14 +401,18 @@ namespace llvm {
/// size, return the constant being splatted. The ByteSize field indicates
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+
+ /// If this is a qvaligni shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isQVALIGNIShuffleMask(SDNode *N);
}
- class PPCSubtarget;
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &Subtarget;
public:
- explicit PPCTargetLowering(const PPCTargetMachine &TM);
+ explicit PPCTargetLowering(const PPCTargetMachine &TM,
+ const PPCSubtarget &STI);
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
@@ -468,7 +501,8 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
- MachineBasicBlock *MBB, bool is64Bit,
+ MachineBasicBlock *MBB,
+ unsigned AtomicSize,
unsigned BinOpcode) const;
MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -488,9 +522,10 @@ namespace llvm {
ConstraintWeight getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const override;
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
+ MVT VT) const override;
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
@@ -504,6 +539,21 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "es")
+ return InlineAsm::Constraint_es;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "Z")
+ return InlineAsm::Constraint_Z;
+ else if (ConstraintCode == "Zy")
+ return InlineAsm::Constraint_Zy;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
@@ -619,6 +669,10 @@ namespace llvm {
void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
SelectionDAG &DAG, SDLoc dl) const;
+ SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const;
+ SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const;
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -642,8 +696,6 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
- SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
@@ -672,11 +724,15 @@ namespace llvm {
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -687,11 +743,12 @@ namespace llvm {
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
- SDValue InFlag, SDValue Chain,
+ SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
SDValue &Callee,
int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const;
SDValue
LowerFormalArguments(SDValue Chain,
@@ -753,7 +810,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const;
SDValue
LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv,
@@ -762,7 +820,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const;
SDValue
LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
bool isVarArg, bool isTailCall, bool IsPatchPoint,
@@ -770,7 +829,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 5c6675dd36b3..d62833037db5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -56,22 +56,23 @@ def tlscall : Operand<i64> {
def SHL64 : SDNodeXForm<imm, [{
// Transformation function: 63 - imm
- return getI32Imm(63 - N->getZExtValue());
+ return getI32Imm(63 - N->getZExtValue(), SDLoc(N));
}]>;
def SRL64 : SDNodeXForm<imm, [{
// Transformation function: 64 - imm
- return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+ return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue(), SDLoc(N))
+ : getI32Imm(0, SDLoc(N));
}]>;
def HI32_48 : SDNodeXForm<imm, [{
// Transformation function: shift the immediate value down into the low bits.
- return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 32, SDLoc(N)));
}]>;
def HI48_64 : SDNodeXForm<imm, [{
// Transformation function: shift the immediate value down into the low bits.
- return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 48, SDLoc(N)));
}]>;
@@ -202,9 +203,6 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
-def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
- (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
-
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
@@ -238,15 +236,19 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr", IIC_LdStLDARX,
- [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
+ "ldarx $rD, $ptr", IIC_LdStLDARX, []>;
-let Defs = [CR0] in
+// Instruction to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", IIC_LdStSTDCX,
- [(PPCstcx i64:$rS, xoaddr:$dst)]>,
- isDOT;
+ "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -328,6 +330,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
Requires<[In64BitMode]>;
}
+def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR),
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT),
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -596,6 +604,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
"popcntd $rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctpop i64:$rS))]>;
+def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "bpermd $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>,
+ isPPC64, Requires<[HasBPERMD]>;
let isCodeGenOnly = 1, isCommutable = 1 in
def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
@@ -609,14 +621,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
"popcntw $rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctpop i32:$rS))]>;
-defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divd", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdu", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divd", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64;
+defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdu", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
+def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde $rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde. $rT, $rA, $rB", IIC_IntDivD,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ isPPC64, Requires<[HasExtDiv]>;
+def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu $rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu. $rT, $rA, $rB", IIC_IntDivD,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ isPPC64, Requires<[HasExtDiv]>;
let isCommutable = 1 in
defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"mulld", "$rT, $rA, $rB", IIC_IntMulHD,
@@ -686,7 +714,7 @@ defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
let isSelect = 1 in
def ISEL8 : AForm_4<31, 15,
(outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
- "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
+ "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
[]>;
} // Interpretation64Bit
} // hasSideEffects = 0
@@ -699,7 +727,7 @@ def ISEL8 : AForm_4<31, 15,
// Sign extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
"lha $rD, $src", IIC_LdStLHA,
@@ -755,7 +783,7 @@ def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Zero extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 iaddr:$src))]>;
@@ -813,7 +841,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
// Full 8-byte loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
@@ -837,11 +865,6 @@ def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
[(set i64:$rD,
(PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
-def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
- "ld 2, $src", IIC_LdStLD,
- [(PPCload_toc ixaddr:$src)]>, isPPC64;
-
def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
@@ -870,25 +893,16 @@ def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
}
}
-def : Pat<(PPCload ixaddr:$src),
- (LD ixaddr:$src)>;
-def : Pat<(PPCload xaddr:$src),
- (LDX xaddr:$src)>;
-
// Support for medium and large code model.
+let hasSideEffects = 0 in {
def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
- "#ADDIStocHA",
- [(set i64:$rD,
- (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
- isPPC64;
+ "#ADDIStocHA", []>, isPPC64;
+let mayLoad = 1 in
def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
- "#LDtocL",
- [(set i64:$rD,
- (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+ "#LDtocL", []>, isPPC64;
def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
- "#ADDItocL",
- [(set i64:$rD,
- (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
+ "#ADDItocL", []>, isPPC64;
+}
// Support for thread-local storage.
def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
@@ -914,6 +928,28 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ "#GETtlsADDR",
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
+ in
+def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
+ (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
+ "#ADDItlsgdLADDR",
+ [(set i64:$rD,
+ (PPCaddiTlsgdLAddr i64:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>,
+ isPPC64;
def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
@@ -924,6 +960,28 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ "#GETtlsldADDR",
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8
+// are true defines, while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
+ in
+def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
+ (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
+ "#ADDItlsldLADDR",
+ [(set i64:$rD,
+ (PPCaddiTlsldLAddr i64:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>,
+ isPPC64;
def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 4ef08eb1130d..e27bf7f5c0e0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -11,6 +11,21 @@
//
//===----------------------------------------------------------------------===//
+// *********************************** NOTE ***********************************
+// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing **
+// ** which VMX and VSX instructions are lane-sensitive and which are not. **
+// ** A lane-sensitive instruction relies, implicitly or explicitly, on **
+// ** whether lanes are numbered from left to right. An instruction like **
+// ** VADDFP is not lane-sensitive, because each lane of the result vector **
+// ** relies only on the corresponding lane of the source vectors. However, **
+// ** an instruction like VMULESB is lane-sensitive, because "even" and **
+// ** "odd" lanes are different for big-endian and little-endian numbering. **
+// ** **
+// ** When adding new VMX and VSX instructions, please consider whether they **
+// ** are lane-sensitive. If so, they must be added to a switch statement **
+// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). **
+// ****************************************************************************
+
//===----------------------------------------------------------------------===//
// Altivec transformation functions and pattern fragments.
//
@@ -28,6 +43,10 @@ def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
}]>;
+def vpkudum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
+}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
@@ -36,6 +55,10 @@ def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
}]>;
+def vpkudum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
+}]>;
// These fragments are provided for little-endian, where the inputs must be
// swapped for correct semantics.
@@ -47,6 +70,10 @@ def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
}]>;
+def vpkudum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
@@ -129,7 +156,7 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N));
}]>;
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -140,7 +167,7 @@ def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG), SDLoc(N));
}]>;
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -151,7 +178,7 @@ def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
/// VSLDOI_swapped* - These fragments are provided for little-endian, where
/// the inputs must be swapped for correct semantics.
def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG), SDLoc(N));
}]>;
def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -161,21 +188,21 @@ def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG), SDLoc(N));
}]>;
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG), SDLoc(N));
}]>;
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG), SDLoc(N));
}]>;
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -269,6 +296,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
!strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vB))]>;
+class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
+ !strconcat(opc, " $vD, $vA"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA))]>;
+
+class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
+ !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@@ -342,7 +379,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
-let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+let PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
@@ -750,7 +787,7 @@ def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-
+
let isCodeGenOnly = 1 in {
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", IIC_VecFP,
@@ -791,18 +828,38 @@ def : Pat<(store v4i32:$rS, xoaddr:$dst),
def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v1i128 VRRC:$src))), (v16i8 VRRC:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v1i128 VRRC:$src))), (v8i16 VRRC:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v1i128 VRRC:$src))), (v4i32 VRRC:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v1i128 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v1i128 VRRC:$src))), (v2i64 VRRC:$src)>;
+
+def : Pat<(v1i128 (bitconvert (v16i8 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v8i16 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
// Shuffles.
@@ -929,3 +986,178 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
} // end HasAltivec
+def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
+let Predicates = [HasP8Altivec] in {
+
+let isCommutable = 1 in {
+def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw,
+ v2i64, v4i32>;
+def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw,
+ v2i64, v4i32>;
+def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
+ v2i64, v4i32>;
+def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
+ v2i64, v4i32>;
+def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuluwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>;
+def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
+def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
+def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
+def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
+} // isCommutable
+
+// Vector shifts
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsld $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
+def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
+def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrad $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Integer Arithmetic Instructions
+let isCommutable = 1 in {
+def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
+def VADDUQM : VXForm_1<256, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vadduqm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (add v1i128:$vA, v1i128:$vB))]>;
+} // isCommutable
+
+// Vector Quadword Add
+def VADDEUQM : VA1a_Int_Ty<60, "vaddeuqm", int_ppc_altivec_vaddeuqm, v1i128>;
+def VADDCUQ : VX1_Int_Ty<320, "vaddcuq", int_ppc_altivec_vaddcuq, v1i128>;
+def VADDECUQ : VA1a_Int_Ty<61, "vaddecuq", int_ppc_altivec_vaddecuq, v1i128>;
+
+// Vector Doubleword Subtract
+def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Quadword Subtract
+def VSUBUQM : VXForm_1<1280, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubuqm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (sub v1i128:$vA, v1i128:$vB))]>;
+def VSUBEUQM : VA1a_Int_Ty<62, "vsubeuqm", int_ppc_altivec_vsubeuqm, v1i128>;
+def VSUBCUQ : VX1_Int_Ty<1344, "vsubcuq", int_ppc_altivec_vsubcuq, v1i128>;
+def VSUBECUQ : VA1a_Int_Ty<63, "vsubecuq", int_ppc_altivec_vsubecuq, v1i128>;
+
+// Count Leading Zeros
+def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vclzb $vD, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (ctlz v16i8:$vB))]>;
+def VCLZH : VXForm_2<1858, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vclzh $vD, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (ctlz v8i16:$vB))]>;
+def VCLZW : VXForm_2<1922, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vclzw $vD, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (ctlz v4i32:$vB))]>;
+def VCLZD : VXForm_2<1986, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vclzd $vD, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (ctlz v2i64:$vB))]>;
+
+// Population Count
+def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vpopcntb $vD, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (ctpop v16i8:$vB))]>;
+def VPOPCNTH : VXForm_2<1859, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vpopcnth $vD, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (ctpop v8i16:$vB))]>;
+def VPOPCNTW : VXForm_2<1923, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vpopcntw $vD, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (ctpop v4i32:$vB))]>;
+def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vpopcntd $vD, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (ctpop v2i64:$vB))]>;
+
+let isCommutable = 1 in {
+// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
+// VSX equivalents. We need to fix this up at some point. Two possible
+// solutions for this problem:
+// 1. Disable Altivec patterns that compete with VSX patterns using the
+// !HasVSX predicate. This essentially favours VSX over Altivec, in
+// hopes of reducing register pressure (larger register set using VSX
+// instructions than VMX instructions)
+// 2. Employ a more disciplined use of AddedComplexity, which would provide
+// more fine-grained control than option 1. This would be beneficial
+// if we find situations where Altivec is really preferred over VSX.
+def VEQV : VXForm_1<1668, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "veqv $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (vnot_ppc (xor v4i32:$vA, v4i32:$vB)))]>;
+def VNAND : VXForm_1<1412, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vnand $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (vnot_ppc (and v4i32:$vA, v4i32:$vB)))]>;
+} // isCommutable
+
+def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vorc $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (or v4i32:$vA,
+ (vnot_ppc v4i32:$vB)))]>;
+
+// i64 element comparisons.
+def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
+def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
+def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
+def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+
+// The cryptography instructions that do not require Category:Vector.Crypto
+def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
+ int_ppc_altivec_crypto_vpmsumb, v16i8>;
+def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
+ int_ppc_altivec_crypto_vpmsumh, v8i16>;
+def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
+ int_ppc_altivec_crypto_vpmsumw, v4i32>;
+def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
+ int_ppc_altivec_crypto_vpmsumd, v2i64>;
+def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
+ int_ppc_altivec_crypto_vpermxor, v16i8>;
+
+// Vector doubleword integer pack and unpack.
+def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss,
+ v4i32, v2i64>;
+def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus,
+ v4i32, v2i64>;
+def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpkudum $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD,
+ (vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus,
+ v4i32, v2i64>;
+def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
+ v2i64, v4i32>;
+def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
+ v2i64, v4i32>;
+
+// Shuffle patterns for unary and swapped (LE) vector pack modulo.
+def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
+ (VPKUDUM $vA, $vA)>;
+def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUDUM $vB, $vA)>;
+
+
+} // end HasP8Altivec
+
+// Crypto instructions (from builtins)
+let Predicates = [HasP8Crypto] in {
+def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
+ int_ppc_altivec_crypto_vshasigmaw, v4i32>;
+def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
+ int_ppc_altivec_crypto_vshasigmad, v2i64>;
+def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
+ v2i64>;
+def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
+ int_ppc_altivec_crypto_vcipherlast, v2i64>;
+def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
+ int_ppc_altivec_crypto_vncipher, v2i64>;
+def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
+ int_ppc_altivec_crypto_vncipherlast, v2i64>;
+def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
+} // HasP8Crypto
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 186d5dce5705..4e03ed27653f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -562,6 +562,47 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+// Used for QPX
+class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_19<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_18<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRA = 0;
+}
+
+class XForm_20<bits<6> opcode, bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+ bits<4> tttt;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-24} = tttt;
+ let Inst{25-30} = xo;
+ let Inst{31} = 0;
+}
+
class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -652,6 +693,60 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0;
}
+class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit R;
+
+ bit RC = 1;
+
+ let Inst{6-9} = 0;
+ let Inst{10} = R;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit A;
+
+ bit RC = 1;
+
+ let Inst{6} = A;
+ let Inst{7-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit L;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{7-9} = 0;
+ let Inst{10} = L;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+
+ bit RC = 0;
+
+ let Inst{6-8} = BF;
+ let Inst{9-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
// XX*-Form (VSX)
class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -669,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = XT{5};
}
+class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let B = 0;
+}
+
class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -835,6 +936,21 @@ class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class DCB_Form_hint<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> TH;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = TH;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
// DSS_Form - Form X instruction, used for altivec dss* instructions.
class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr,
@@ -945,6 +1061,38 @@ class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XLForm_4<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bit W;
+ bits<4> U;
+
+ bit RC = 0;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-14} = 0;
+ let Inst{15} = W;
+ let Inst{16-19} = U;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XLForm_S<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<1> S;
+
+ let Pattern = pattern;
+
+ let Inst{6-19} = 0;
+ let Inst{20} = S;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
class XLForm_2_and_DSForm_1<bits<6> opcode1, bits<10> xo1, bit lk,
bits<6> opcode2, bits<2> xo2,
dag OOL, dag IOL, string asmstr,
@@ -1023,6 +1171,19 @@ class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XFXForm_3p<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> Entry;
+ let Pattern = pattern;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = Entry;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -1081,6 +1242,25 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = RC;
}
+class XFLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit L;
+ bits<8> FLM;
+ bit W;
+ bits<5> FRB;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6} = L;
+ let Inst{7-14} = FLM;
+ let Inst{15} = W;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
// 1.7.10 XS-Form - SRADI.
class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -1177,6 +1357,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+// Used for QPX
+class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRA = 0;
+ let FRC = 0;
+}
+
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -1383,6 +1571,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
let Inst{21-31} = xo;
}
+/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
+class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<1> ST;
+ bits<4> SIX;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16} = ST;
+ let Inst{17-20} = SIX;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
+class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
// E-4 VXR-Form
class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -1401,6 +1622,49 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{22-31} = xo;
}
+// Z23-Form (used by QPX)
+class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+ bits<2> idx;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-22} = idx;
+ let Inst{23-30} = xo;
+ let Inst{31} = RC;
+}
+
+class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<12> idx;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-22} = idx;
+ let Inst{23-30} = xo;
+ let Inst{31} = RC;
+}
+
//===----------------------------------------------------------------------===//
class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
new file mode 100644
index 000000000000..6c4e2129087c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -0,0 +1,172 @@
+//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hardware Transactional Memory extension to the
+// PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+
+def HTM_get_imm : SDNodeXForm<imm, [{
+ return getI32Imm (N->getZExtValue(), SDLoc(N));
+}]>;
+
+let hasSideEffects = 1, usesCustomInserter = 1 in {
+def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+}
+
+
+let Predicates = [HasHTM] in {
+
+def TBEGIN : XForm_htm0 <31, 654,
+ (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+
+def TEND : XForm_htm1 <31, 686,
+ (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+
+def TABORT : XForm_base_r3xo <31, 910,
+ (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+ []>, isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TABORTWC : XForm_base_r3xo <31, 782,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTWCI : XForm_base_r3xo <31, 846,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDC : XForm_base_r3xo <31, 814,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDCI : XForm_base_r3xo <31, 878,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TSR : XForm_htm2 <31, 750,
+ (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TCHECK : XForm_htm3 <31, 718,
+ (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
+
+
+def TRECLAIM : XForm_base_r3xo <31, 942,
+ (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+ IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TRECHKPT : XForm_base_r3xo <31, 1006,
+ (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+// Builtins
+
+// All HTM instructions, with the exception of tcheck, set CR0 with the
+// value of the MSR Transaction State (TS) bits that exist before the
+// instruction is executed. For tbegin., the EQ bit in CR0 can be used
+// to determine whether the transaction was successfully started (0) or
+// failed (1). We use an XORI pattern to 'flip' the bit to match the
+// tbegin builtin API which defines a return value of 1 as success.
+
+def : Pat<(int_ppc_tbegin i32:$R),
+ (XORI
+ (EXTRACT_SUBREG (
+ TBEGIN (HTM_get_imm imm:$R)), sub_eq),
+ 1)>;
+
+def : Pat<(int_ppc_tend i32:$R),
+ (TEND (HTM_get_imm imm:$R))>;
+
+
+def : Pat<(int_ppc_tabort i32:$R),
+ (TABORT $R)>;
+
+def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tcheck),
+ (TCHECK_RET)>;
+
+def : Pat<(int_ppc_treclaim i32:$RA),
+ (TRECLAIM $RA)>;
+
+def : Pat<(int_ppc_trechkpt),
+ (TRECHKPT)>;
+
+def : Pat<(int_ppc_tsr i32:$L),
+ (TSR (HTM_get_imm imm:$L))>;
+
+def : Pat<(int_ppc_get_texasr),
+ (MFSPR8 130)>;
+
+def : Pat<(int_ppc_get_texasru),
+ (MFSPR8 131)>;
+
+def : Pat<(int_ppc_get_tfhar),
+ (MFSPR8 128)>;
+
+def : Pat<(int_ppc_get_tfiar),
+ (MFSPR8 129)>;
+
+
+def : Pat<(int_ppc_set_texasr i64:$V),
+ (MTSPR8 130, $V)>;
+
+def : Pat<(int_ppc_set_texasru i64:$V),
+ (MTSPR8 131, $V)>;
+
+def : Pat<(int_ppc_set_tfhar i64:$V),
+ (MTSPR8 128, $V)>;
+
+def : Pat<(int_ppc_set_tfiar i64:$V),
+ (MTSPR8 129, $V)>;
+
+
+// Extended mnemonics
+def : Pat<(int_ppc_tendall),
+ (TEND 1)>;
+
+def : Pat<(int_ppc_tresume),
+ (TSR 1)>;
+
+def : Pat<(int_ppc_tsuspend),
+ (TSR 0)>;
+
+def : Pat<(i64 (int_ppc_ttest)),
+ (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+
+} // [HasHTM]
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2492229dc319..b4bb50c80937 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -52,9 +53,6 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
cl::desc("Disable compare instruction optimization"), cl::Hidden);
-static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation",
-cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden);
-
static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
cl::Hidden);
@@ -64,7 +62,7 @@ void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- Subtarget(STI), RI(STI) {}
+ Subtarget(STI), RI(STI.getTargetMachine()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
@@ -85,11 +83,11 @@ PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
/// to use for this target when scheduling the DAG.
-ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
- const InstrItineraryData *II,
- const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
unsigned Directive =
- DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
@@ -116,9 +114,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
- const TargetRegisterInfo *TRI = &getRegisterInfo();
bool IsRegCR;
- if (TRI->isVirtualRegister(Reg)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
const MachineRegisterInfo *MRI =
&DefMI->getParent()->getParent()->getRegInfo();
IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
@@ -184,6 +181,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case PPC::RESTORE_CRBIT:
case PPC::LVX:
case PPC::LXVD2X:
+ case PPC::QVLFDX:
+ case PPC::QVLFSXs:
+ case PPC::QVLFDXb:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -210,6 +210,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case PPC::SPILL_CRBIT:
case PPC::STVX:
case PPC::STXVD2X:
+ case PPC::QVSTFDX:
+ case PPC::QVSTFSXs:
+ case PPC::QVSTFDXb:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -694,6 +697,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
.addReg(Cond[1].getReg(), 0, SubIdx);
}
+static unsigned getCRBitValue(unsigned CRBit) {
+ unsigned Ret = 4;
+ if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
+ CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
+ CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
+ CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
+ Ret = 3;
+ if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
+ CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
+ CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
+ CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
+ Ret = 2;
+ if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
+ CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
+ CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
+ CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
+ Ret = 1;
+ if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
+ CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
+ CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
+ CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
+ Ret = 0;
+
+ assert(Ret != 4 && "Invalid CR bit register");
+ return Ret;
+}
+
void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -702,7 +732,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// legalization. Promote them here.
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (PPC::F8RCRegClass.contains(DestReg) &&
- PPC::VSLRCRegClass.contains(SrcReg)) {
+ PPC::VSRCRegClass.contains(SrcReg)) {
unsigned SuperReg =
TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
@@ -711,7 +741,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
DestReg = SuperReg;
} else if (PPC::VRRCRegClass.contains(DestReg) &&
- PPC::VSHRCRegClass.contains(SrcReg)) {
+ PPC::VSRCRegClass.contains(SrcReg)) {
unsigned SuperReg =
TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
@@ -720,7 +750,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
DestReg = SuperReg;
} else if (PPC::F8RCRegClass.contains(SrcReg) &&
- PPC::VSLRCRegClass.contains(DestReg)) {
+ PPC::VSRCRegClass.contains(DestReg)) {
unsigned SuperReg =
TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
@@ -729,7 +759,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SrcReg = SuperReg;
} else if (PPC::VRRCRegClass.contains(SrcReg) &&
- PPC::VSHRCRegClass.contains(DestReg)) {
+ PPC::VSRCRegClass.contains(DestReg)) {
unsigned SuperReg =
TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
@@ -739,6 +769,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SrcReg = SuperReg;
}
+ // Different class register copy
+ if (PPC::CRBITRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ unsigned CRReg = getCRFromCRBit(SrcReg);
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(CRReg), getKillRegState(KillSrc);
+ // Rotate the CR bit in the CR fields to be the least significant bit and
+ // then mask with 0x1 (MB = ME = 31).
+ BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
+ .addImm(31)
+ .addImm(31);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::G8RCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ }
+
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
@@ -760,8 +816,15 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// copies are generated, they are close enough to some use that the
// lower-latency form is preferable.
Opc = PPC::XXLOR;
- else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
+ else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
+ PPC::VSSRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::XXLORf;
+ else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMR;
+ else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMRs;
+ else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
@@ -839,6 +902,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -847,6 +916,24 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
SpillsVRS = true;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -934,6 +1021,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -942,6 +1033,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
DestReg),
FrameIdx));
SpillsVRS = true;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -1608,672 +1711,3 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
}
}
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "ppc-vsx-fma-mutate"
-
-namespace {
- // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
- // (Altivec and scalar floating-point registers), we need to transform the
- // copies into subregister copies with other restrictions.
- struct PPCVSXFMAMutate : public MachineFunctionPass {
- static char ID;
- PPCVSXFMAMutate() : MachineFunctionPass(ID) {
- initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
- }
-
- LiveIntervals *LIS;
-
- const PPCTargetMachine *TM;
- const PPCInstrInfo *TII;
-
-protected:
- bool processBlock(MachineBasicBlock &MBB) {
- bool Changed = false;
-
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
- for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
- I != IE; ++I) {
- MachineInstr *MI = I;
-
- // The default (A-type) VSX FMA form kills the addend (it is taken from
- // the target register, which is then updated to reflect the result of
- // the FMA). If the instruction, however, kills one of the registers
- // used for the product, then we can use the M-form instruction (which
- // will take that value from the to-be-defined register).
-
- int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
- if (AltOpc == -1)
- continue;
-
- // This pass is run after register coalescing, and so we're looking for
- // a situation like this:
- // ...
- // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
- // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
- // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
- // ...
- // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19,
- // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19
- // ...
- // Where we can eliminate the copy by changing from the A-type to the
- // M-type instruction. Specifically, for this example, this means:
- // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
- // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
- // is replaced by:
- // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9,
- // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
- // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
-
- SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
-
- VNInfo *AddendValNo =
- LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
- MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
-
- // The addend and this instruction must be in the same block.
-
- if (!AddendMI || AddendMI->getParent() != MI->getParent())
- continue;
-
- // The addend must be a full copy within the same register class.
-
- if (!AddendMI->isFullCopy())
- continue;
-
- unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
- if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
- MRI.getRegClass(AddendSrcReg))
- continue;
- } else {
- // If AddendSrcReg is a physical register, make sure the destination
- // register class contains it.
- if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
- ->contains(AddendSrcReg))
- continue;
- }
-
- // In theory, there could be other uses of the addend copy before this
- // fma. We could deal with this, but that would require additional
- // logic below and I suspect it will not occur in any relevant
- // situations. Additionally, check whether the copy source is killed
- // prior to the fma. In order to replace the addend here with the
- // source of the copy, it must still be live here. We can't use
- // interval testing for a physical register, so as long as we're
- // walking the MIs we may as well test liveness here.
- bool OtherUsers = false, KillsAddendSrc = false;
- for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
- J != JE; --J) {
- if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
- OtherUsers = true;
- break;
- }
- if (J->modifiesRegister(AddendSrcReg, TRI) ||
- J->killsRegister(AddendSrcReg, TRI)) {
- KillsAddendSrc = true;
- break;
- }
- }
-
- if (OtherUsers || KillsAddendSrc)
- continue;
-
- // Find one of the product operands that is killed by this instruction.
-
- unsigned KilledProdOp = 0, OtherProdOp = 0;
- if (LIS->getInterval(MI->getOperand(2).getReg())
- .Query(FMAIdx).isKill()) {
- KilledProdOp = 2;
- OtherProdOp = 3;
- } else if (LIS->getInterval(MI->getOperand(3).getReg())
- .Query(FMAIdx).isKill()) {
- KilledProdOp = 3;
- OtherProdOp = 2;
- }
-
- // If there are no killed product operands, then this transformation is
- // likely not profitable.
- if (!KilledProdOp)
- continue;
-
- // For virtual registers, verify that the addend source register
- // is live here (as should have been assured above).
- assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
- LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
- "Addend source register is not live!");
-
- // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
-
- unsigned AddReg = AddendMI->getOperand(1).getReg();
- unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
- unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg();
-
- unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
- unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
- unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg();
-
- bool AddRegKill = AddendMI->getOperand(1).isKill();
- bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
- bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill();
-
- bool AddRegUndef = AddendMI->getOperand(1).isUndef();
- bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
- bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef();
-
- unsigned OldFMAReg = MI->getOperand(0).getReg();
-
- // The transformation doesn't work well with things like:
- // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
- // so leave such things alone.
- if (OldFMAReg == KilledProdReg)
- continue;
-
- assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
- "Addend copy not tied to old FMA output!");
-
- DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;);
-
- MI->getOperand(0).setReg(KilledProdReg);
- MI->getOperand(1).setReg(KilledProdReg);
- MI->getOperand(3).setReg(AddReg);
- MI->getOperand(2).setReg(OtherProdReg);
-
- MI->getOperand(0).setSubReg(KilledProdSubReg);
- MI->getOperand(1).setSubReg(KilledProdSubReg);
- MI->getOperand(3).setSubReg(AddSubReg);
- MI->getOperand(2).setSubReg(OtherProdSubReg);
-
- MI->getOperand(1).setIsKill(KilledProdRegKill);
- MI->getOperand(3).setIsKill(AddRegKill);
- MI->getOperand(2).setIsKill(OtherProdRegKill);
-
- MI->getOperand(1).setIsUndef(KilledProdRegUndef);
- MI->getOperand(3).setIsUndef(AddRegUndef);
- MI->getOperand(2).setIsUndef(OtherProdRegUndef);
-
- MI->setDesc(TII->get(AltOpc));
-
- DEBUG(dbgs() << " -> " << *MI);
-
- // The killed product operand was killed here, so we can reuse it now
- // for the result of the fma.
-
- LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
- VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
- for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
- UI != UE;) {
- MachineOperand &UseMO = *UI;
- MachineInstr *UseMI = UseMO.getParent();
- ++UI;
-
- // Don't replace the result register of the copy we're about to erase.
- if (UseMI == AddendMI)
- continue;
-
- UseMO.setReg(KilledProdReg);
- UseMO.setSubReg(KilledProdSubReg);
- }
-
- // Extend the live intervals of the killed product operand to hold the
- // fma result.
-
- LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
- for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
- AI != AE; ++AI) {
- // Don't add the segment that corresponds to the original copy.
- if (AI->valno == AddendValNo)
- continue;
-
- VNInfo *NewFMAValNo =
- NewFMAInt.getNextValue(AI->start,
- LIS->getVNInfoAllocator());
-
- NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
- NewFMAValNo));
- }
- DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
-
- FMAInt.removeValNo(FMAValNo);
- DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
-
- // Remove the (now unused) copy.
-
- DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
- LIS->RemoveMachineInstrFromMaps(AddendMI);
- AddendMI->eraseFromParent();
-
- Changed = true;
- }
-
- return Changed;
- }
-
-public:
- bool runOnMachineFunction(MachineFunction &MF) override {
- TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
- // If we don't have VSX then go ahead and return without doing
- // anything.
- if (!TM->getSubtargetImpl()->hasVSX())
- return false;
-
- LIS = &getAnalysis<LiveIntervals>();
-
- TII = TM->getSubtargetImpl()->getInstrInfo();
-
- bool Changed = false;
-
- if (DisableVSXFMAMutate)
- return Changed;
-
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
- if (processBlock(B))
- Changed = true;
- }
-
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
- "PowerPC VSX FMA Mutation", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
- "PowerPC VSX FMA Mutation", false, false)
-
-char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
-
-char PPCVSXFMAMutate::ID = 0;
-FunctionPass*
-llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "ppc-vsx-copy"
-
-namespace llvm {
- void initializePPCVSXCopyPass(PassRegistry&);
-}
-
-namespace {
- // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
- // (Altivec and scalar floating-point registers), we need to transform the
- // copies into subregister copies with other restrictions.
- struct PPCVSXCopy : public MachineFunctionPass {
- static char ID;
- PPCVSXCopy() : MachineFunctionPass(ID) {
- initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
- }
-
- const PPCTargetMachine *TM;
- const PPCInstrInfo *TII;
-
- bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
- MachineRegisterInfo &MRI) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- return RC->hasSubClassEq(MRI.getRegClass(Reg));
- } else if (RC->contains(Reg)) {
- return true;
- }
-
- return false;
- }
-
- bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
- }
-
- bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
- }
-
- bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
- }
-
-protected:
- bool processBlock(MachineBasicBlock &MBB) {
- bool Changed = false;
-
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
- I != IE; ++I) {
- MachineInstr *MI = I;
- if (!MI->isFullCopy())
- continue;
-
- MachineOperand &DstMO = MI->getOperand(0);
- MachineOperand &SrcMO = MI->getOperand(1);
-
- if ( IsVSReg(DstMO.getReg(), MRI) &&
- !IsVSReg(SrcMO.getReg(), MRI)) {
- // This is a copy *to* a VSX register from a non-VSX register.
- Changed = true;
-
- const TargetRegisterClass *SrcRC =
- IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
- &PPC::VSLRCRegClass;
- assert((IsF8Reg(SrcMO.getReg(), MRI) ||
- IsVRReg(SrcMO.getReg(), MRI)) &&
- "Unknown source for a VSX copy");
-
- unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
- BuildMI(MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
- .addImm(1) // add 1, not 0, because there is no implicit clearing
- // of the high bits.
- .addOperand(SrcMO)
- .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
- PPC::sub_64);
-
- // The source of the original copy is now the new virtual register.
- SrcMO.setReg(NewVReg);
- } else if (!IsVSReg(DstMO.getReg(), MRI) &&
- IsVSReg(SrcMO.getReg(), MRI)) {
- // This is a copy *from* a VSX register to a non-VSX register.
- Changed = true;
-
- const TargetRegisterClass *DstRC =
- IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
- &PPC::VSLRCRegClass;
- assert((IsF8Reg(DstMO.getReg(), MRI) ||
- IsVRReg(DstMO.getReg(), MRI)) &&
- "Unknown destination for a VSX copy");
-
- // Copy the VSX value into a new VSX register of the correct subclass.
- unsigned NewVReg = MRI.createVirtualRegister(DstRC);
- BuildMI(MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVReg)
- .addOperand(SrcMO);
-
- // Transform the original copy into a subregister extraction copy.
- SrcMO.setReg(NewVReg);
- SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
- PPC::sub_64);
- }
- }
-
- return Changed;
- }
-
-public:
- bool runOnMachineFunction(MachineFunction &MF) override {
- TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
- // If we don't have VSX on the subtarget, don't do anything.
- if (!TM->getSubtargetImpl()->hasVSX())
- return false;
- TII = TM->getSubtargetImpl()->getInstrInfo();
-
- bool Changed = false;
-
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
- if (processBlock(B))
- Changed = true;
- }
-
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
- "PowerPC VSX Copy Legalization", false, false)
-
-char PPCVSXCopy::ID = 0;
-FunctionPass*
-llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "ppc-vsx-copy-cleanup"
-
-namespace llvm {
- void initializePPCVSXCopyCleanupPass(PassRegistry&);
-}
-
-namespace {
- // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX
- // registers (mostly because the ABI code still places all values into the
- // "traditional" floating-point and vector registers). Remove them here.
- struct PPCVSXCopyCleanup : public MachineFunctionPass {
- static char ID;
- PPCVSXCopyCleanup() : MachineFunctionPass(ID) {
- initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry());
- }
-
- const PPCTargetMachine *TM;
- const PPCInstrInfo *TII;
-
-protected:
- bool processBlock(MachineBasicBlock &MBB) {
- bool Changed = false;
-
- SmallVector<MachineInstr *, 4> ToDelete;
- for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
- I != IE; ++I) {
- MachineInstr *MI = I;
- if (MI->getOpcode() == PPC::XXLOR &&
- MI->getOperand(0).getReg() == MI->getOperand(1).getReg() &&
- MI->getOperand(0).getReg() == MI->getOperand(2).getReg())
- ToDelete.push_back(MI);
- }
-
- if (!ToDelete.empty())
- Changed = true;
-
- for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) {
- DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]);
- ToDelete[i]->eraseFromParent();
- }
-
- return Changed;
- }
-
-public:
- bool runOnMachineFunction(MachineFunction &MF) override {
- TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
- // If we don't have VSX don't bother doing anything here.
- if (!TM->getSubtargetImpl()->hasVSX())
- return false;
- TII = TM->getSubtargetImpl()->getInstrInfo();
-
- bool Changed = false;
-
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
- if (processBlock(B))
- Changed = true;
- }
-
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE,
- "PowerPC VSX Copy Cleanup", false, false)
-
-char PPCVSXCopyCleanup::ID = 0;
-FunctionPass*
-llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); }
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "ppc-early-ret"
-STATISTIC(NumBCLR, "Number of early conditional returns");
-STATISTIC(NumBLR, "Number of early returns");
-
-namespace llvm {
- void initializePPCEarlyReturnPass(PassRegistry&);
-}
-
-namespace {
- // PPCEarlyReturn pass - For simple functions without epilogue code, move
- // returns up, and create conditional returns, to avoid unnecessary
- // branch-to-blr sequences.
- struct PPCEarlyReturn : public MachineFunctionPass {
- static char ID;
- PPCEarlyReturn() : MachineFunctionPass(ID) {
- initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
- }
-
- const PPCTargetMachine *TM;
- const PPCInstrInfo *TII;
-
-protected:
- bool processBlock(MachineBasicBlock &ReturnMBB) {
- bool Changed = false;
-
- MachineBasicBlock::iterator I = ReturnMBB.begin();
- I = ReturnMBB.SkipPHIsAndLabels(I);
-
- // The block must be essentially empty except for the blr.
- if (I == ReturnMBB.end() ||
- (I->getOpcode() != PPC::BLR && I->getOpcode() != PPC::BLR8) ||
- I != ReturnMBB.getLastNonDebugInstr())
- return Changed;
-
- SmallVector<MachineBasicBlock*, 8> PredToRemove;
- for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
- PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
- bool OtherReference = false, BlockChanged = false;
- for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
- if (J->getOpcode() == PPC::B) {
- if (J->getOperand(0).getMBB() == &ReturnMBB) {
- // This is an unconditional branch to the return. Replace the
- // branch with a blr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()));
- MachineBasicBlock::iterator K = J--;
- K->eraseFromParent();
- BlockChanged = true;
- ++NumBLR;
- continue;
- }
- } else if (J->getOpcode() == PPC::BCC) {
- if (J->getOperand(2).getMBB() == &ReturnMBB) {
- // This is a conditional branch to the return. Replace the branch
- // with a bclr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
- .addImm(J->getOperand(0).getImm())
- .addReg(J->getOperand(1).getReg());
- MachineBasicBlock::iterator K = J--;
- K->eraseFromParent();
- BlockChanged = true;
- ++NumBCLR;
- continue;
- }
- } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) {
- if (J->getOperand(1).getMBB() == &ReturnMBB) {
- // This is a conditional branch to the return. Replace the branch
- // with a bclr.
- BuildMI(**PI, J, J->getDebugLoc(),
- TII->get(J->getOpcode() == PPC::BC ?
- PPC::BCLR : PPC::BCLRn))
- .addReg(J->getOperand(0).getReg());
- MachineBasicBlock::iterator K = J--;
- K->eraseFromParent();
- BlockChanged = true;
- ++NumBCLR;
- continue;
- }
- } else if (J->isBranch()) {
- if (J->isIndirectBranch()) {
- if (ReturnMBB.hasAddressTaken())
- OtherReference = true;
- } else
- for (unsigned i = 0; i < J->getNumOperands(); ++i)
- if (J->getOperand(i).isMBB() &&
- J->getOperand(i).getMBB() == &ReturnMBB)
- OtherReference = true;
- } else if (!J->isTerminator() && !J->isDebugValue())
- break;
-
- if (J == (*PI)->begin())
- break;
-
- --J;
- }
-
- if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
- OtherReference = true;
-
- // Predecessors are stored in a vector and can't be removed here.
- if (!OtherReference && BlockChanged) {
- PredToRemove.push_back(*PI);
- }
-
- if (BlockChanged)
- Changed = true;
- }
-
- for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
- PredToRemove[i]->removeSuccessor(&ReturnMBB);
-
- if (Changed && !ReturnMBB.hasAddressTaken()) {
- // We now might be able to merge this blr-only block into its
- // by-layout predecessor.
- if (ReturnMBB.pred_size() == 1 &&
- (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
- // Move the blr into the preceding block.
- MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
- PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
- PrevMBB.removeSuccessor(&ReturnMBB);
- }
-
- if (ReturnMBB.pred_empty())
- ReturnMBB.eraseFromParent();
- }
-
- return Changed;
- }
-
-public:
- bool runOnMachineFunction(MachineFunction &MF) override {
- TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
- TII = TM->getSubtargetImpl()->getInstrInfo();
-
- bool Changed = false;
-
- // If the function does not have at least two blocks, then there is
- // nothing to do.
- if (MF.size() < 2)
- return Changed;
-
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
- if (processBlock(B))
- Changed = true;
- }
-
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
- "PowerPC Early-Return Creation", false, false)
-
-char PPCEarlyReturn::ID = 0;
-FunctionPass*
-llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 4add6f9781e8..7fd076a7d1cd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -63,7 +63,7 @@ enum PPC970_Unit {
};
} // end namespace PPCII
-
+class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index ad3bc1dbeeeb..c5a044ce85fd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -46,13 +46,6 @@ def SDT_PPCstbrx : SDTypeProfile<0, 3, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
-def SDT_PPClarx : SDTypeProfile<1, 1, [
- SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-def SDT_PPCstcx : SDTypeProfile<0, 2, [
- SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-
def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
@@ -61,6 +54,27 @@ def tocentry32 : Operand<iPTR> {
let MIOperandInfo = (ops i32imm:$imm);
}
+def SDT_PPCqvfperm : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3>
+]>;
+def SDT_PPCqvgpci : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisInt<1>
+]>;
+def SDT_PPCqvaligni : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>
+]>;
+def SDT_PPCqvesplati : SDTypeProfile<1, 2, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
+]>;
+
+def SDT_PPCqbflt : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisVec<1>
+]>;
+
+def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@@ -98,7 +112,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL",
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
-def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
+ [SDNPMayLoad, SDNPMemOperand]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
@@ -110,14 +125,33 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
+ SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
-def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
- [SDNPHasChain]>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
+ SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
+def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
+def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
+def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
+
+def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
+
+def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
+ [SDNPHasChain, SDNPMayLoad]>;
+
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -136,20 +170,9 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
- [SDNPHasChain, SDNPSideEffect,
- SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@@ -178,6 +201,12 @@ def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc,
[SDNPHasChain, SDNPSideEffect]>;
+def PPCclrbhrb : SDNode<"PPCISD::CLRBHRB", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCmfbhrbe : SDNode<"PPCISD::MFBHRBE", SDTIntBinOp, [SDNPHasChain]>;
+def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
@@ -195,18 +224,6 @@ def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-// Instructions to support atomic operations
-def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
- [SDNPHasChain, SDNPMayLoad]>;
-def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
- [SDNPHasChain, SDNPMayStore]>;
-
-// Instructions to support medium and large code model
-def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
-def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
-def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
-
-
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -217,41 +234,42 @@ def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
def SHL32 : SDNodeXForm<imm, [{
// Transformation function: 31 - imm
- return getI32Imm(31 - N->getZExtValue());
+ return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
}]>;
def SRL32 : SDNodeXForm<imm, [{
// Transformation function: 32 - imm
- return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+ return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue(), SDLoc(N))
+ : getI32Imm(0, SDLoc(N));
}]>;
def LO16 : SDNodeXForm<imm, [{
// Transformation function: get the low 16 bits.
- return getI32Imm((unsigned short)N->getZExtValue());
+ return getI32Imm((unsigned short)N->getZExtValue(), SDLoc(N));
}]>;
def HI16 : SDNodeXForm<imm, [{
// Transformation function: shift the immediate value down into the low bits.
- return getI32Imm((unsigned)N->getZExtValue() >> 16);
+ return getI32Imm((unsigned)N->getZExtValue() >> 16, SDLoc(N));
}]>;
def HA16 : SDNodeXForm<imm, [{
// Transformation function: shift the immediate value down into the low bits.
signed int Val = N->getZExtValue();
- return getI32Imm((Val - (signed short)Val) >> 16);
+ return getI32Imm((Val - (signed short)Val) >> 16, SDLoc(N));
}]>;
def MB : SDNodeXForm<imm, [{
// Transformation function: get the start bit of a mask
unsigned mb = 0, me;
(void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
- return getI32Imm(mb);
+ return getI32Imm(mb, SDLoc(N));
}]>;
def ME : SDNodeXForm<imm, [{
// Transformation function: get the end bit of a mask
unsigned mb, me = 0;
(void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
- return getI32Imm(me);
+ return getI32Imm(me, SDLoc(N));
}]>;
def maskimm32 : PatLeaf<(imm), [{
// maskImm predicate - True if immediate is a run of ones.
@@ -421,6 +439,18 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
+def crrc0 : RegisterOperand<CRRC0> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCU1ImmAsmOperand : AsmOperandClass {
+ let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u1imm : Operand<i32> {
+ let PrintMethod = "printU1ImmOperand";
+ let ParserMatchClass = PPCU1ImmAsmOperand;
+}
def PPCU2ImmAsmOperand : AsmOperandClass {
let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
@@ -431,6 +461,15 @@ def u2imm : Operand<i32> {
let ParserMatchClass = PPCU2ImmAsmOperand;
}
+def PPCU3ImmAsmOperand : AsmOperandClass {
+ let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u3imm : Operand<i32> {
+ let PrintMethod = "printU3ImmOperand";
+ let ParserMatchClass = PPCU3ImmAsmOperand;
+}
+
def PPCU4ImmAsmOperand : AsmOperandClass {
let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
let RenderMethod = "addImmOperands";
@@ -466,6 +505,24 @@ def u6imm : Operand<i32> {
let ParserMatchClass = PPCU6ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<6>";
}
+def PPCU10ImmAsmOperand : AsmOperandClass {
+ let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u10imm : Operand<i32> {
+ let PrintMethod = "printU10ImmOperand";
+ let ParserMatchClass = PPCU10ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<10>";
+}
+def PPCU12ImmAsmOperand : AsmOperandClass {
+ let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u12imm : Operand<i32> {
+ let PrintMethod = "printU12ImmOperand";
+ let ParserMatchClass = PPCU12ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<12>";
+}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
let RenderMethod = "addS16ImmOperands";
@@ -681,6 +738,12 @@ def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">;
def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
def IsE500 : Predicate<"PPCSubTarget->isE500()">;
def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
+def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
+def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
+def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -757,6 +820,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
}
}
+// Multiclass for instructions for which the non record form is not cracked
+// and the record form is cracked (i.e. divw, mullw, etc.)
+multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel, PPC970_DGroup_First,
+ PPC970_DGroup_Cracked;
+ }
+}
+
multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -1292,6 +1372,24 @@ let PPC970_Unit = 7 in {
"sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>;
}
+// Branch history rolling buffer.
+def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB,
+ [(PPCclrbhrb)]>,
+ PPC970_DGroup_Single;
+// The $dmy argument used for MFBHRBE is not needed; however, including
+// it avoids automatic generation of PPCFastISel::fastEmit_i(), which
+// interferes with necessary special handling (see PPCFastISel.cpp).
+def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$rD),
+ (ins u10imm:$imm, u10imm:$dmy),
+ "mfbhrbe $rD, $imm", IIC_BrB,
+ [(set i32:$rD,
+ (PPCmfbhrbe imm:$imm, imm:$dmy))]>,
+ PPC970_DGroup_First;
+
+def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm",
+ IIC_BrB, [(PPCrfebb (i32 imm:$imm))]>,
+ PPC970_DGroup_Single;
+
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
@@ -1305,12 +1403,6 @@ def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst",
def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst",
IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), "dcbt $dst",
- IIC_LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
- PPC970_DGroup_Single;
-def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), "dcbtst $dst",
- IIC_LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
- PPC970_DGroup_Single;
def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst",
IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
PPC970_DGroup_Single;
@@ -1318,15 +1410,29 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in {
+def DCBT : DCB_Form_hint<278, (outs), (ins u5imm:$TH, memrr:$dst),
+ "dcbt $dst, $TH", IIC_LdStDCBF, []>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
+ "dcbtst $dst, $TH", IIC_LdStDCBF, []>,
+ PPC970_DGroup_Single;
+} // hasSideEffects = 0
+
def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
- "icbt $CT, $src", IIC_LdStLoad>, Requires<[IsBookE]>;
+ "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+
+def : Pat<(int_ppc_dcbt xoaddr:$dst),
+ (DCBT 0, xoaddr:$dst)>;
+def : Pat<(int_ppc_dcbtst xoaddr:$dst),
+ (DCBTST 0, xoaddr:$dst)>;
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
- (DCBT xoaddr:$dst)>; // data prefetch for loads
+ (DCBT 0, xoaddr:$dst)>; // data prefetch for loads
def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
- (DCBTST xoaddr:$dst)>; // data prefetch for stores
+ (DCBTST 0, xoaddr:$dst)>; // data prefetch for stores
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
- (ICBT 0, xoaddr:$dst)>; // inst prefetch (for read)
+ (ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)
// Atomic operations
let usesCustomInserter = 1 in {
@@ -1409,15 +1515,44 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
+def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src", IIC_LdStLWARX,
- [(set i32:$rD, (PPClarx xoaddr:$src))]>;
+ "lwarx $rD, $src", IIC_LdStLWARX, []>;
+
+// Instructions to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
+ "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
+ "stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
+
+def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
-let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", IIC_LdStSTWCX,
- [(PPCstcx i32:$rS, xoaddr:$dst)]>,
- isDOT;
+ "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+}
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
@@ -1436,7 +1571,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
//
// Unindexed (r+i) Loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
@@ -1533,7 +1668,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
@@ -2193,15 +2328,20 @@ let Uses = [RM], Defs = [RM] in {
def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
"mtfsb1 $FM", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
- "mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
- PPC970_DGroup_Single, PPC970_Unit_FPU;
+ let isCodeGenOnly = 1 in
+ def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
+ "mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM] in {
def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
"mffs $rT", IIC_IntMFFS,
[(set f64:$rT, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
+
+ let Defs = [CR1] in
+ def MFFSo : XForm_42<63, 583, (outs f8rc:$rT), (ins),
+ "mffs. $rT", IIC_IntMFFS, []>, isDOT;
}
@@ -2221,14 +2361,30 @@ defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
[(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
-defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divw", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwu", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divw", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>;
+defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwu", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
+def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe. $rT, $rA, $rB", IIC_IntDivW,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ Requires<[HasExtDiv]>;
+def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu. $rT, $rA, $rB", IIC_IntDivW,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ Requires<[HasExtDiv]>;
let isCommutable = 1 in {
defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
@@ -2377,7 +2533,7 @@ let PPC970_Unit = 1 in { // FXU Operations.
let isSelect = 1 in
def ISEL : AForm_4<31, 15,
(outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
- "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
+ "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
[]>;
}
@@ -2459,9 +2615,6 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
-def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
- (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
-
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -2516,10 +2669,49 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsgdL32",
[(set i32:$rD,
(PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+// LR is a true define, while the rest of the Defs are clobbers. R3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+ "GETtlsADDR32",
+ [(set i32:$rD,
+ (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
+// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
+ (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
+ "#ADDItlsgdLADDR32",
+ [(set i32:$rD,
+ (PPCaddiTlsgdLAddr i32:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>;
def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsldL32",
[(set i32:$rD,
(PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+// LR is a true define, while the rest of the Defs are clobbers. R3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+ "GETtlsldADDR32",
+ [(set i32:$rD,
+ (PPCgetTlsldAddr i32:$reg,
+ tglobaltlsaddr:$sym))]>;
+// Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. R3 and LR
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
+ (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
+ "#ADDItlsldLADDR32",
+ [(set i32:$rD,
+ (PPCaddiTlsldLAddr i32:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>;
def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDIdtprelL32",
[(set i32:$rD,
@@ -2604,6 +2796,8 @@ include "PPCInstrAltivec.td"
include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
+include "PPCInstrQPX.td"
+include "PPCInstrHTM.td"
def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;
@@ -3188,6 +3382,28 @@ def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
"mtmsrd $RS, $L", IIC_SprMTMSRD>;
+def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
+ "mcrfs $BF, $BFA", IIC_BrMCR>;
+
+def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+ "mtfsfi $BF, $U, $W", IIC_IntMFFS>;
+
+def MTFSFIo : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+ "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isDOT;
+
+def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>;
+def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>;
+
+def MTFSF : XFLForm_1<63, 711, (outs),
+ (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+ "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>;
+def MTFSFo : XFLForm_1<63, 711, (outs),
+ (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+ "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isDOT;
+
+def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+
def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
"slbie $RB", IIC_SprSLBIE, []>;
@@ -3302,7 +3518,7 @@ class PPCAsmPseudo<string asm, dag iops>
def : InstAlias<"sc", (SC 0)>;
def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
-def : InstAlias<"msync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"msync", (SYNC 0), 0>, Requires<[HasSYNC]>;
def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
@@ -3312,6 +3528,17 @@ def : InstAlias<"waitimpl", (WAIT 2)>;
def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>;
+def DCBTx : PPCAsmPseudo<"dcbt $dst", (ins memrr:$dst)>;
+def DCBTSTx : PPCAsmPseudo<"dcbtst $dst", (ins memrr:$dst)>;
+
+def DCBTCT : PPCAsmPseudo<"dcbtct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTDS : PPCAsmPseudo<"dcbtds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTT : PPCAsmPseudo<"dcbtt $dst", (ins memrr:$dst)>;
+
+def DCBTSTCT : PPCAsmPseudo<"dcbtstct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTSTDS : PPCAsmPseudo<"dcbtstds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTSTT : PPCAsmPseudo<"dcbtstt $dst", (ins memrr:$dst)>;
+
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -3544,6 +3771,9 @@ def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0,
def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"cntlz $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlz. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+
def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
@@ -3584,6 +3814,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+
// These generic branch instruction forms are used for the assembler parser only.
// Defs and Uses are conservative, since we don't know the BO value.
let PPC970_Unit = 7 in {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
new file mode 100644
index 000000000000..5c66b42690c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -0,0 +1,1192 @@
+//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the QPX extension to the PowerPC instruction set.
+// Reference:
+// Book Q: QPX Architecture Definition. IBM (as updated in) 2011.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegQFRCAsmOperand : AsmOperandClass {
+ let Name = "RegQFRC"; let PredicateMethod = "isRegNumber";
+}
+def qfrc : RegisterOperand<QFRC> {
+ let ParserMatchClass = PPCRegQFRCAsmOperand;
+}
+def PPCRegQSRCAsmOperand : AsmOperandClass {
+ let Name = "RegQSRC"; let PredicateMethod = "isRegNumber";
+}
+def qsrc : RegisterOperand<QSRC> {
+ let ParserMatchClass = PPCRegQSRCAsmOperand;
+}
+def PPCRegQBRCAsmOperand : AsmOperandClass {
+ let Name = "RegQBRC"; let PredicateMethod = "isRegNumber";
+}
+def qbrc : RegisterOperand<QBRC> {
+ let ParserMatchClass = PPCRegQBRCAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// QPXA1_Int - A AForm_1 intrinsic definition.
+class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions).
+class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA2_Int - A AForm_2 intrinsic definition.
+class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXA3_Int - A AForm_3 intrinsic definition.
+class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>;
+// QPXA4_Int - A AForm_4a intrinsic definition.
+class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+// QPXX18_Int - A XForm_18 intrinsic definition.
+class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+ : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXX19_Int - A XForm_19 intrinsic definition.
+class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+ : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Frags.
+
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncst node:$val,
+ node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def fround_inexact : PatFrag<(ops node:$val), (fround node:$val), [{
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
+}]>;
+
+def fround_exact : PatFrag<(ops node:$val), (fround node:$val), [{
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
+}]>;
+
+let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
+ def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasQPX : Predicate<"PPCSubTarget->hasQPX()">;
+let Predicates = [HasQPX] in {
+let DecoderNamespace = "QPX" in {
+let hasSideEffects = 0 in { // QPX instructions don't have side effects.
+let Uses = [RM] in {
+ // Add Instructions
+ let isCommutable = 1 in {
+ def QVFADD : AForm_2<4, 21,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>;
+ def QVFADDSs : AForm_2<0, 21,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>;
+ }
+ def QVFSUB : AForm_2<4, 20,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>;
+ def QVFSUBSs : AForm_2<0, 20,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>;
+
+ // Estimate Instructions
+ def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfre $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>;
+ def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>;
+ let isCodeGenOnly = 1 in
+ def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfres $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>;
+
+ def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrsqrte $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>;
+ def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>;
+ let isCodeGenOnly = 1 in
+ def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>;
+
+ // Multiply Instructions
+ let isCommutable = 1 in {
+ def QVFMUL : AForm_3<4, 25,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+ "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>;
+ def QVFMULSs : AForm_3<0, 25,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC),
+ "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>;
+ }
+ def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>;
+ def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>;
+
+ // Multiply-add instructions
+ def QVFMADD : AForm_1<4, 29,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
+ def QVFMADDSs : AForm_1<0, 29,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
+ def QVFNMADD : AForm_1<4, 31,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+ v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
+ def QVFNMADDSs : AForm_1<0, 31,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+ v4f32:$FRB)))]>;
+ def QVFMSUB : AForm_1<4, 28,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
+ (fneg v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
+ def QVFMSUBSs : AForm_1<0, 28,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
+ (fneg v4f32:$FRB)))]>;
+ def QVFNMSUB : AForm_1<4, 30,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+ (fneg v4f64:$FRB))))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
+ def QVFNMSUBSs : AForm_1<0, 30,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+ (fneg v4f32:$FRB))))]>;
+ def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>;
+ def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>;
+ def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>;
+ def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>;
+ def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>;
+ def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>;
+ def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>;
+ def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>;
+
+ // Select Instruction
+ let isCodeGenOnly = 1 in
+ def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>;
+ def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT),
+ (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (vselect v4i1:$FRA,
+ v4f64:$FRC, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT),
+ (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (vselect v4i1:$FRA,
+ v4f32:$FRC, v4f32:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT),
+ (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4i1:$FRT, (vselect v4i1:$FRA,
+ v4i1:$FRC, v4i1:$FRB))]>;
+
+ // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+ // instruction selection into a branch sequence.
+ let usesCustomInserter = 1 in {
+ def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QFRC",
+ []>;
+ def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QSRC",
+ []>;
+ def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QBRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+ qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+ [(set v4f64:$dst,
+ (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+ def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+ qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+ [(set v4f32:$dst,
+ (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+ def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+ qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+ [(set v4i1:$dst,
+ (select i1:$cond, v4i1:$T, v4i1:$F))]>;
+ }
+
+ // Convert and Round Instructions
+ def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
+ let isCodeGenOnly = 1 in
+ def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfctid $FRT, $FRB", IIC_FPGeneral, []>;
+
+ def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>;
+ def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>;
+ def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>;
+ def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>;
+ def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>;
+ def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>;
+ def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>;
+ def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>;
+ let isCodeGenOnly = 1 in
+ def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>;
+
+ def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>;
+ def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>;
+ def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>;
+
+ let isCodeGenOnly = 1 in
+ def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>;
+ def QVFRSPs : XForm_19<4, 12,
+ (outs qsrc:$FRT), (ins qfrc:$FRB),
+ "qvfrsp $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>;
+
+ def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfriz $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfriz $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>;
+
+ def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrin $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (frnd v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrin $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (frnd v4f32:$FRB))]>;
+
+ def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrip $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fceil v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrip $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fceil v4f32:$FRB))]>;
+
+ def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrim $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrim $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>;
+
+ // Move Instructions
+ def QVFMR : XForm_19<4, 72,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4f64:$FRT, v4f64:$FRB) */]>;
+ let isCodeGenOnly = 1 in {
+ def QVFMRs : XForm_19<4, 72,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4f32:$FRT, v4f32:$FRB) */]>;
+ def QVFMRb : XForm_19<4, 72,
+ (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4i1:$FRT, v4i1:$FRB) */]>;
+ }
+ def QVFNEG : XForm_19<4, 40,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfneg $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fneg v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNEGs : XForm_19<4, 40,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfneg $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fneg v4f32:$FRB))]>;
+ def QVFABS : XForm_19<4, 264,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fabs v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFABSs : XForm_19<4, 264,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fabs v4f32:$FRB))]>;
+ def QVFNABS : XForm_19<4, 136,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfnabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNABSs : XForm_19<4, 136,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfnabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>;
+ def QVFCPSGN : XForm_18<4, 8,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCPSGNs : XForm_18<4, 8,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>;
+
+ def QVALIGNI : Z23Form_1<4, 5,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvaligni v4f64:$FRA, v4f64:$FRB,
+ (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVALIGNIs : Z23Form_1<4, 5,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvaligni v4f32:$FRA, v4f32:$FRB,
+ (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVALIGNIb : Z23Form_1<4, 5,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4i1:$FRT,
+ (PPCqvaligni v4i1:$FRA, v4i1:$FRB,
+ (i32 imm:$idx)))]>;
+
+ def QVESPLATI : Z23Form_2<4, 37,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVESPLATIs : Z23Form_2<4, 37,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVESPLATIb : Z23Form_2<4, 37,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4i1:$FRT,
+ (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>;
+
+ def QVFPERM : AForm_1<4, 6,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+ let isCodeGenOnly = 1 in
+ def QVFPERMs : AForm_1<4, 6,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC),
+ "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>;
+
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+ def QVGPCI : Z23Form_3<4, 133,
+ (outs qfrc:$FRT), (ins u12imm:$idx),
+ "qvgpci $FRT, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>;
+
+ // Compare Instruction
+ let isCodeGenOnly = 1 in
+ def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>;
+ def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>;
+ let isCodeGenOnly = 1 in
+ def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>;
+ def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>;
+ def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>;
+ def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>;
+
+ let isCodeGenOnly = 1 in
+ def QVFLOGICAL : XForm_20<4, 4,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+ def QVFLOGICALb : XForm_20<4, 4,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+ let isCodeGenOnly = 1 in
+ def QVFLOGICALs : XForm_20<4, 4,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+
+ // Load indexed instructions
+ let mayLoad = 1 in {
+ def QVLFDX : XForm_1<31, 583,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfdx $FRT, $src", IIC_LdStLFD,
+ [(set v4f64:$FRT, (load xoaddr:$src))]>;
+ let isCodeGenOnly = 1 in
+ def QVLFDXb : XForm_1<31, 583,
+ (outs qbrc:$FRT), (ins memrr:$src),
+ "qvlfdx $FRT, $src", IIC_LdStLFD, []>;
+
+ let RC = 1 in
+ def QVLFDXA : XForm_1<31, 583,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfdxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFDUX : XForm_1<31, 615,
+ (outs qfrc:$FRT, ptr_rc_nor0:$ea_result),
+ (ins memrr:$src),
+ "qvlfdux $FRT, $src", IIC_LdStLFDU, []>,
+ RegConstraint<"$src.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+ let RC = 1 in
+ def QVLFDUXA : XForm_1<31, 615,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfduxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFSX : XForm_1<31, 519,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>;
+
+ let isCodeGenOnly = 1 in
+ def QVLFSXb : XForm_1<31, 519,
+ (outs qbrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>;
+ let isCodeGenOnly = 1 in
+ def QVLFSXs : XForm_1<31, 519,
+ (outs qsrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4f32:$FRT, (load xoaddr:$src))]>;
+
+ let RC = 1 in
+ def QVLFSXA : XForm_1<31, 519,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFSUX : XForm_1<31, 551,
+ (outs qsrc:$FRT, ptr_rc_nor0:$ea_result),
+ (ins memrr:$src),
+ "qvlfsux $FRT, $src", IIC_LdStLFDU, []>,
+ RegConstraint<"$src.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+ let RC = 1 in
+ def QVLFSUXA : XForm_1<31, 551,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCDX : XForm_1<31, 71,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdx $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCDXA : XForm_1<31, 71,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCDUX : XForm_1<31, 103,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdux $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCDUXA : XForm_1<31, 103,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCSX : XForm_1<31, 7,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVLFCSXs : XForm_1<31, 7,
+ (outs qsrc:$FRT), (ins memrr:$src),
+ "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+
+ let RC = 1 in
+ def QVLFCSXA : XForm_1<31, 7,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCSUX : XForm_1<31, 39,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsux $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCSUXA : XForm_1<31, 39,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFIWAX : XForm_1<31, 871,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwax $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFIWAXA : XForm_1<31, 871,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFIWZX : XForm_1<31, 839,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFIWZXA : XForm_1<31, 839,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>;
+ }
+
+
+ def QVLPCLDX : XForm_1<31, 582,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcldx $FRT, $src", IIC_LdStLFD, []>;
+ def QVLPCLSX : XForm_1<31, 518,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpclsx $FRT, $src", IIC_LdStLFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVLPCLSXint : XForm_11<31, 518,
+ (outs qfrc:$FRT), (ins G8RC:$src),
+ "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>;
+ def QVLPCRDX : XForm_1<31, 70,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>;
+ def QVLPCRSX : XForm_1<31, 6,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>;
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def QVSTFDX : XForm_8<31, 711,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdx $FRT, $dst", IIC_LdStSTFD,
+ [(store qfrc:$FRT, xoaddr:$dst)]>;
+ let isCodeGenOnly = 1 in
+ def QVSTFDXb : XForm_8<31, 711,
+ (outs), (ins qbrc:$FRT, memrr:$dst),
+ "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>;
+
+ let RC = 1 in
+ def QVSTFDXA : XForm_8<31, 711,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res),
+ (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+
+ let RC = 1 in
+ def QVSTFDUXA : XForm_8<31, 743,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDXI : XForm_8<31, 709,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFDXIA : XForm_8<31, 709,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDUXI : XForm_8<31, 741,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFDUXIA : XForm_8<31, 741,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSX : XForm_8<31, 647,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+ [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>;
+ let isCodeGenOnly = 1 in
+ def QVSTFSXs : XForm_8<31, 647,
+ (outs), (ins qsrc:$FRT, memrr:$dst),
+ "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+ [(store qsrc:$FRT, xoaddr:$dst)]>;
+
+ let RC = 1 in
+ def QVSTFSXA : XForm_8<31, 647,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+ (ins qsrc:$FRT, memrr:$dst),
+ "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+ let isCodeGenOnly = 1 in
+ def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+ (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+
+ let RC = 1 in
+ def QVSTFSUXA : XForm_8<31, 679,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSXI : XForm_8<31, 645,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFSXIA : XForm_8<31, 645,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSUXI : XForm_8<31, 677,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFSUXIA : XForm_8<31, 677,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDX : XForm_8<31, 199,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDXA : XForm_8<31, 199,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSX : XForm_8<31, 135,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVSTFCSXs : XForm_8<31, 135,
+ (outs), (ins qsrc:$FRT, memrr:$dst),
+ "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+
+ let RC = 1 in
+ def QVSTFCSXA : XForm_8<31, 135,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDUX : XForm_8<31, 231,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDUXA : XForm_8<31, 231,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSUX : XForm_8<31, 167,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSUXA : XForm_8<31, 167,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDXI : XForm_8<31, 197,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDXIA : XForm_8<31, 197,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSXI : XForm_8<31, 133,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSXIA : XForm_8<31, 133,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDUXI : XForm_8<31, 229,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDUXIA : XForm_8<31, 229,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSUXI : XForm_8<31, 165,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSUXIA : XForm_8<31, 165,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFIWX : XForm_8<31, 967,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFIWXA : XForm_8<31, 967,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>;
+ }
+}
+
+} // neverHasSideEffects
+}
+
+def : InstAlias<"qvfclr $FRT",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>;
+def : InstAlias<"qvfand $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>;
+def : InstAlias<"qvfandc $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>;
+def : InstAlias<"qvfctfb $FRT, $FRA",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>;
+def : InstAlias<"qvfxor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>;
+def : InstAlias<"qvfor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>;
+def : InstAlias<"qvfnor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>;
+def : InstAlias<"qvfequ $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>;
+def : InstAlias<"qvfnot $FRT, $FRA",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>;
+def : InstAlias<"qvforc $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>;
+def : InstAlias<"qvfnand $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>;
+def : InstAlias<"qvfset $FRT",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>;
+
+//===----------------------------------------------------------------------===//
+// Additional QPX Patterns
+//
+
+def : Pat<(v4f64 (scalar_to_vector f64:$A)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>;
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, 0)),
+ (EXTRACT_SUBREG $S, sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (EXTRACT_SUBREG $S, sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, 1)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
+def : Pat<(f64 (vector_extract v4f64:$S, 2)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
+def : Pat<(f64 (vector_extract v4f64:$S, 3)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
+
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)),
+ (EXTRACT_SUBREG (QVFPERM $S, $S,
+ (QVLPCLSXint (RLDICR $F, 2,
+ /* 63-2 = */ 61))),
+ sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)),
+ (EXTRACT_SUBREG (QVFPERMs $S, $S,
+ (QVLPCLSXint (RLDICR $F, 2,
+ /* 63-2 = */ 61))),
+ sub_64)>;
+
+def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFPERM $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B),
+ (QVFCPSGN $A, $B)>;
+
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign v4f64:$frB, v4f32:$frA),
+ (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>;
+def : Pat<(fcopysign QSRC:$frB, QFRC:$frA),
+ (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>;
+
+def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>;
+def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>;
+def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>;
+
+def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>;
+def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>;
+def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>;
+def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>;
+
+def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>;
+def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>;
+
+def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B),
+ (QVFADD $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B),
+ (QVFSUB $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
+ (QVFMUL $A, $B)>;
+
+// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
+ (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
+ (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (QVFNMSUBSs $A, $B, $C)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (QVFNMSUBSs $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFNMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFMSUB $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFNMSUB $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src),
+ (QVLFDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+ (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src),
+ (QVLFSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+ (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src),
+ (QVLFCDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src),
+ (QVLFCDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src),
+ (QVLFCSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src),
+ (QVLFCSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+ (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src),
+ (QVLFIWAXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src),
+ (QVLFIWAX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src),
+ (QVLFIWZXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src),
+ (QVLFIWZX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+ (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src),
+ (QVLPCLDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src),
+ (QVLPCLSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src),
+ (QVLPCRDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src),
+ (QVLPCRSX xoaddr:$src)>;
+
+def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst),
+ (QVSTFDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst),
+ (QVSTFSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst),
+ (QVSTFCDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst),
+ (QVSTFCDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst),
+ (QVSTFCSXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst),
+ (QVSTFCSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst),
+ (QVSTFDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst),
+ (QVSTFIWXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst),
+ (QVSTFIWX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst),
+ (QVSTFSXA $T, xoaddr:$dst)>;
+
+def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFDUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFSUXs $rS, $ptrreg, $ptroff)>;
+
+def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)),
+ (QVFLOGICAL $A, $B, imm:$idx)>;
+def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)),
+ (QVGPCI imm:$idx)>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPLTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPGTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPEQb $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ),
+ (QVFCMPEQb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT),
+ (QVFCMPGTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFCMPLTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT),
+ (QVFCMPLTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFCMPGTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFCMPEQb $FRA, $FRB), (i32 10))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPLTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPGTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPEQbs $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ),
+ (QVFCMPEQbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT),
+ (QVFCMPGTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFCMPLTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT),
+ (QVFCMPLTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFCMPGTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFCMPEQbs $FRA, $FRB), (i32 10))>;
+
+def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 4))>;
+def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 8))>;
+def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 9))>;
+def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 13))>;
+def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 14))>;
+
+def : Pat<(and v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 1))>;
+def : Pat<(or v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 7))>;
+def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 6))>;
+def : Pat<(not v4i1:$FRA),
+ (QVFLOGICALb $FRA, $FRA, (i32 10))>;
+
+def : Pat<(v4f64 (fextend v4f32:$src)),
+ (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f32 (fround_exact v4f64:$src)),
+ (COPY_TO_REGCLASS $src, QSRC)>;
+
+// Extract the underlying floating-point values from the
+// QPX (-1.0, 1.0) boolean representation.
+def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
+ (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
+ (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
+ (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
+ (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
+ (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
+ (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
+ (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
+ (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
+ (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
+ (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
+ (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
+ (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
+ (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
+ (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
+ (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
+ (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
+ (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
+ (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
+ (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+} // end HasQPX
+
+let Predicates = [HasQPX, NoNaNsFPMath] in {
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>;
+}
+
+let Predicates = [HasQPX, NaNsFPMath] in {
+// When either of these operands is NaN, we should return the other operand.
+// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need
+// to explicitly or with a NaN test on the second operand.
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index b21b251443eb..9685bac2aebb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -11,6 +11,21 @@
//
//===----------------------------------------------------------------------===//
+// *********************************** NOTE ***********************************
+// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing **
+// ** which VMX and VSX instructions are lane-sensitive and which are not. **
+// ** A lane-sensitive instruction relies, implicitly or explicitly, on **
+// ** whether lanes are numbered from left to right. An instruction like **
+// ** VADDFP is not lane-sensitive, because each lane of the result vector **
+// ** relies only on the corresponding lane of the source vectors. However, **
+// ** an instruction like VMULESB is lane-sensitive, because "even" and **
+// ** "odd" lanes are different for big-endian and little-endian numbering. **
+// ** **
+// ** When adding new VMX and VSX instructions, please consider whether they **
+// ** are lane-sensitive. If so, they must be added to a switch statement **
+// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). **
+// ****************************************************************************
+
def PPCRegVSRCAsmOperand : AsmOperandClass {
let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
}
@@ -25,6 +40,13 @@ def vsfrc : RegisterOperand<VSFRC> {
let ParserMatchClass = PPCRegVSFRCAsmOperand;
}
+def PPCRegVSSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vssrc : RegisterOperand<VSSRC> {
+ let ParserMatchClass = PPCRegVSSRCAsmOperand;
+}
+
// Little-endian-specific nodes.
def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@@ -41,6 +63,9 @@ def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
+def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
+def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
@@ -66,7 +91,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
- let mayLoad = 1, canFoldAsLoad = 1 in {
+ let mayLoad = 1 in {
def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
@@ -85,7 +110,7 @@ let Uses = [RM] in {
(outs vsrc:$XT), (ins memrr:$src),
"lxvw4x $XT, $src", IIC_LdStLFD,
[(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
- }
+ } // mayLoad
// Store indexed instructions
let mayStore = 1 in {
@@ -97,13 +122,14 @@ let Uses = [RM] in {
def STXVD2X : XX1Form<31, 972,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvd2x $XT, $dst", IIC_LdStSTFD,
- [(int_ppc_vsx_stxvd2x v2f64:$XT, xoaddr:$dst)]>;
+ [(store v2f64:$XT, xoaddr:$dst)]>;
def STXVW4X : XX1Form<31, 908,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvw4x $XT, $dst", IIC_LdStSTFD,
- [(int_ppc_vsx_stxvw4x v4i32:$XT, xoaddr:$dst)]>;
- }
+ [(store v4i32:$XT, xoaddr:$dst)]>;
+
+ } // mayStore
// Add/Mul Instructions
let isCommutable = 1 in {
@@ -773,6 +799,15 @@ let usesCustomInserter = 1, // Expanded after instruction selection.
"#SELECT_VSFRC",
[(set f64:$dst,
(select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst),
+ (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSSRC",
+ []>;
+ def SELECT_VSSRC: Pseudo<(outs f4rc:$dst),
+ (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
+ "#SELECT_VSSRC",
+ [(set f32:$dst,
+ (select i1:$cond, f32:$T, f32:$F))]>;
} // usesCustomInserter
} // AddedComplexity
@@ -872,6 +907,11 @@ def : Pat<(v2f64 (bitconvert v2i64:$A)),
def : Pat<(v2i64 (bitconvert v2f64:$A)),
(COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v2f64 (bitconvert v1i128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v1i128 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
// sign extension patterns
// To extend "in place" from v2i32 to v2i64, we have input data like:
// | undef | i32 | undef | i32 |
@@ -891,9 +931,11 @@ def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
// Stores.
-def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
// Permutes.
@@ -938,3 +980,124 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
} // AddedComplexity
} // HasVSX
+// The following VSX instructions were introduced in Power ISA 2.07
+/* FIXME: if the operands are v2i64, these patterns will not match.
+ we should define new patterns or otherwise match the same patterns
+ when the elements are larger than i32.
+*/
+def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
+let Predicates = [HasP8Vector] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+ let isCommutable = 1 in {
+ def XXLEQV : XX3Form<60, 186,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>;
+ def XXLNAND : XX3Form<60, 178,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
+ v4i32:$XB)))]>;
+ } // isCommutable
+
+ def XXLORC : XX3Form<60, 170,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
+
+ // VSX scalar loads introduced in ISA 2.07
+ let mayLoad = 1 in {
+ def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
+ "lxsspx $XT, $src", IIC_LdStLFD,
+ [(set f32:$XT, (load xoaddr:$src))]>;
+ def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsiwax $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+ def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsiwzx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+ } // mayLoad
+
+ // VSX scalar stores introduced in ISA 2.07
+ let mayStore = 1 in {
+ def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
+ "stxsspx $XT, $dst", IIC_LdStSTFD,
+ [(store f32:$XT, xoaddr:$dst)]>;
+ def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
+ "stxsiwx $XT, $dst", IIC_LdStSTFD,
+ [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+ } // mayStore
+
+ def : Pat<(f64 (extloadf32 xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
+ def : Pat<(f64 (fextend f32:$src)),
+ (COPY_TO_REGCLASS $src, VSFRC)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+ // VSX Elementary Scalar FP arithmetic (SP)
+ let isCommutable = 1 in {
+ def XSADDSP : XX3Form<60, 0,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>;
+ def XSMULSP : XX3Form<60, 16,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
+ } // isCommutable
+
+ def XSDIVSP : XX3Form<60, 24,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
+ [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>;
+ def XSRESP : XX2Form<60, 26,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xsresp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfre f32:$XB))]>;
+ def XSSQRTSP : XX2Form<60, 11,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xssqrtsp $XT, $XB", IIC_FPSqrtS,
+ [(set f32:$XT, (fsqrt f32:$XB))]>;
+ def XSRSQRTESP : XX2Form<60, 10,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xsrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
+ def XSSUBSP : XX3Form<60, 8,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xssubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+} // AddedComplexity = 400
+} // HasP8Vector
+
+let Predicates = [HasDirectMove, HasVSX] in {
+ // VSX direct move instructions
+ def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+ Requires<[In64BitMode]>;
+ def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
+ "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+ def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
+ "mtvsrd $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+ Requires<[In64BitMode]>;
+ def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+ def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+} // HasDirectMove, HasVSX
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
new file mode 100644
index 000000000000..b4e1c099f190
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -0,0 +1,232 @@
+//===-------- PPCLoopDataPrefetch.cpp - Loop Data Prefetching Pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Loop Data Prefetching Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-loop-data-prefetch"
+#include "PPC.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+// By default, we limit this to creating 16 PHIs (which is a little over half
+// of the allocatable register set).
+static cl::opt<bool>
+PrefetchWrites("ppc-loop-prefetch-writes", cl::Hidden, cl::init(false),
+ cl::desc("Prefetch write addresses"));
+
+// This seems like a reasonable default for the BG/Q (this pass is enabled, by
+// default, only on the BG/Q).
+static cl::opt<unsigned>
+PrefDist("ppc-loop-prefetch-distance", cl::Hidden, cl::init(300),
+ cl::desc("The loop prefetch distance"));
+
+static cl::opt<unsigned>
+CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
+ cl::desc("The loop prefetch cache line size"));
+
+namespace llvm {
+ void initializePPCLoopDataPrefetchPass(PassRegistry&);
+}
+
+namespace {
+
+ class PPCLoopDataPrefetch : public FunctionPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ PPCLoopDataPrefetch() : FunctionPass(ID) {
+ initializePPCLoopDataPrefetchPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolution>();
+ // FIXME: For some reason, preserving SE here breaks LSR (even if
+ // this pass changes nothing).
+ // AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+ bool runOnLoop(Loop *L);
+
+ private:
+ AssumptionCache *AC;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ const TargetTransformInfo *TTI;
+ const DataLayout *DL;
+ };
+}
+
+char PPCLoopDataPrefetch::ID = 0;
+INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
+ "PPC Loop Data Prefetch", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
+ "PPC Loop Data Prefetch", false, false)
+
+FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPrefetch(); }
+
+bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = &F.getParent()->getDataLayout();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ bool MadeChange = false;
+
+ for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+ for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
+
+ return MadeChange;
+}
+
+bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
+ bool MadeChange = false;
+
+ // Only prefetch in the inner-most loop
+ if (!L->empty())
+ return MadeChange;
+
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
+ // Calculate the number of iterations ahead to prefetch
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+
+ // If the loop already has prefetches, then assume that the user knows
+ // what he or she is doing and don't add any more.
+ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+ J != JE; ++J)
+ if (CallInst *CI = dyn_cast<CallInst>(J))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::prefetch)
+ return MadeChange;
+
+ Metrics.analyzeBasicBlock(*I, *TTI, EphValues);
+ }
+ unsigned LoopSize = Metrics.NumInsts;
+ if (!LoopSize)
+ LoopSize = 1;
+
+ unsigned ItersAhead = PrefDist/LoopSize;
+ if (!ItersAhead)
+ ItersAhead = 1;
+
+ SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads;
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+ J != JE; ++J) {
+ Value *PtrValue;
+ Instruction *MemI;
+
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
+ MemI = LMemI;
+ PtrValue = LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
+ if (!PrefetchWrites) continue;
+ MemI = SMemI;
+ PtrValue = SMemI->getPointerOperand();
+ } else continue;
+
+ unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+ if (PtrAddrSpace)
+ continue;
+
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE->getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec)
+ continue;
+
+ // We don't want to double prefetch individual cache lines. If this load
+ // is known to be within one cache line of some other load that has
+ // already been prefetched, then don't prefetch this one as well.
+ bool DupPref = false;
+ for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>,
+ 16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end();
+ K != KE; ++K) {
+ const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second);
+ if (const SCEVConstant *ConstPtrDiff =
+ dyn_cast<SCEVConstant>(PtrDiff)) {
+ int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
+ if (PD < (int64_t) CacheLineSize) {
+ DupPref = true;
+ break;
+ }
+ }
+ }
+ if (DupPref)
+ continue;
+
+ const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
+ SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
+ LSCEVAddRec->getStepRecurrence(*SE)));
+ if (!isSafeToExpand(NextLSCEV, *SE))
+ continue;
+
+ PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
+
+ Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
+ SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
+ Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
+
+ IRBuilder<> Builder(MemI);
+ Module *M = (*I)->getParent()->getParent();
+ Type *I32 = Type::getInt32Ty((*I)->getContext());
+ Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
+ Builder.CreateCall(
+ PrefetchFunc,
+ {PrefPtrValue,
+ ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
+ ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
+
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
new file mode 100644
index 000000000000..b6e7799402e1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -0,0 +1,390 @@
+//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to prepare loops for pre-increment addressing
+// modes. Additional PHIs are created for loop induction variables used by
+// load/store instructions so that the pre-increment forms can be used.
+// Generically, this means transforming loops like this:
+// for (int i = 0; i < n; ++i)
+// array[i] = c;
+// to look like this:
+// T *p = array[-1];
+// for (int i = 0; i < n; ++i)
+// *++p = c;
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-loop-preinc-prep"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+// By default, we limit this to creating 16 PHIs (which is a little over half
+// of the allocatable register set).
+static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
+ cl::Hidden, cl::init(16),
+ cl::desc("Potential PHI threshold for PPC preinc loop prep"));
+
+namespace llvm {
+ void initializePPCLoopPreIncPrepPass(PassRegistry&);
+}
+
+namespace {
+
+ class PPCLoopPreIncPrep : public FunctionPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) {
+ initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+ }
+ PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolution>();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ bool runOnLoop(Loop *L);
+ void simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L);
+
+ private:
+ PPCTargetMachine *TM;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ };
+}
+
+char PPCLoopPreIncPrep::ID = 0;
+static const char *name = "Prepare loop for pre-inc. addressing modes";
+INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+
+FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
+ return new PPCLoopPreIncPrep(TM);
+}
+
+namespace {
+ struct SCEVLess : std::binary_function<const SCEV *, const SCEV *, bool>
+ {
+ SCEVLess(ScalarEvolution *SE) : SE(SE) {}
+
+ bool operator() (const SCEV *X, const SCEV *Y) const {
+ const SCEV *Diff = SE->getMinusSCEV(X, Y);
+ return cast<SCEVConstant>(Diff)->getValue()->getSExtValue() < 0;
+ }
+
+ protected:
+ ScalarEvolution *SE;
+ };
+}
+
+static bool IsPtrInBounds(Value *BasePtr) {
+ Value *StrippedBasePtr = BasePtr;
+ while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
+ StrippedBasePtr = BC->getOperand(0);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
+ return GEP->isInBounds();
+
+ return false;
+}
+
+static Value *GetPointerOperand(Value *MemI) {
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
+ return LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
+ return SMemI->getPointerOperand();
+ } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
+ return IMemI->getArgOperand(0);
+ }
+
+ return 0;
+}
+
+bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolution>();
+
+ bool MadeChange = false;
+
+ for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+ for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
+
+ return MadeChange;
+}
+
+bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
+ bool MadeChange = false;
+
+ // Only prep. the inner-most loop
+ if (!L->empty())
+ return MadeChange;
+
+ DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
+
+ BasicBlock *Header = L->getHeader();
+
+ const PPCSubtarget *ST =
+ TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
+
+ unsigned HeaderLoopPredCount =
+ std::distance(pred_begin(Header), pred_end(Header));
+
+ // Collect buckets of comparable addresses used by loads and stores.
+ typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
+ SmallVector<Bucket, 16> Buckets;
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+ J != JE; ++J) {
+ Value *PtrValue;
+ Instruction *MemI;
+
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
+ MemI = LMemI;
+ PtrValue = LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
+ MemI = SMemI;
+ PtrValue = SMemI->getPointerOperand();
+ } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+ MemI = IMemI;
+ PtrValue = IMemI->getArgOperand(0);
+ } else continue;
+ } else continue;
+
+ unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+ if (PtrAddrSpace)
+ continue;
+
+ // There are no update forms for Altivec vector load/stores.
+ if (ST && ST->hasAltivec() &&
+ PtrValue->getType()->getPointerElementType()->isVectorTy())
+ continue;
+
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+ if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
+ if (LARSCEV->getLoop() != L)
+ continue;
+ } else {
+ continue;
+ }
+
+ bool FoundBucket = false;
+ for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
+ for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end();
+ K != KE; ++K) {
+ const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV);
+ if (isa<SCEVConstant>(Diff)) {
+ Buckets[i].insert(std::make_pair(LSCEV, MemI));
+ FoundBucket = true;
+ break;
+ }
+ }
+
+ if (!FoundBucket) {
+ Buckets.push_back(Bucket(SCEVLess(SE)));
+ Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI));
+ }
+ }
+ }
+
+ if (Buckets.empty() || Buckets.size() > MaxVars)
+ return MadeChange;
+
+ BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+ // If there is no loop predecessor, or the loop predecessor's terminator
+ // returns a value (which might contribute to determining the loop's
+ // iteration space), insert a new preheader for the loop.
+ if (!LoopPredecessor ||
+ !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
+ LoopPredecessor = InsertPreheaderForLoop(L, this);
+ if (LoopPredecessor)
+ MadeChange = true;
+ }
+ if (!LoopPredecessor)
+ return MadeChange;
+
+ DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
+
+ SmallSet<BasicBlock *, 16> BBChanged;
+ for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
+ // The base address of each bucket is transformed into a phi and the others
+ // are rewritten as offsets of that variable.
+
+ const SCEVAddRecExpr *BasePtrSCEV =
+ cast<SCEVAddRecExpr>(Buckets[i].begin()->first);
+ if (!BasePtrSCEV->isAffine())
+ continue;
+
+ DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+ assert(BasePtrSCEV->getLoop() == L &&
+ "AddRec for the wrong loop?");
+
+ Instruction *MemI = Buckets[i].begin()->second;
+ Value *BasePtr = GetPointerOperand(MemI);
+ assert(BasePtr && "No pointer operand");
+
+ Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
+ Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
+ BasePtr->getType()->getPointerAddressSpace());
+
+ const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
+ if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
+ continue;
+
+ const SCEVConstant *BasePtrIncSCEV =
+ dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
+ if (!BasePtrIncSCEV)
+ continue;
+ BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
+ if (!isSafeToExpand(BasePtrStartSCEV, *SE))
+ continue;
+
+ DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
+ PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
+ MemI->hasName() ? MemI->getName() + ".phi" : "",
+ Header->getFirstNonPHI());
+
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
+ Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
+ LoopPredecessor->getTerminator());
+
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to add it the right number of times.
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ if (*PI != LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
+ }
+
+ Instruction *InsPoint = Header->getFirstInsertionPt();
+ GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
+ PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ if (*PI == LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(PtrInc, *PI);
+ }
+
+ Instruction *NewBasePtr;
+ if (PtrInc->getType() != BasePtr->getType())
+ NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
+ PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint);
+ else
+ NewBasePtr = PtrInc;
+
+ if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
+ BBChanged.insert(IDel->getParent());
+ BasePtr->replaceAllUsesWith(NewBasePtr);
+ RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
+
+ Value *LastNewPtr = NewBasePtr;
+ for (Bucket::iterator I = std::next(Buckets[i].begin()),
+ IE = Buckets[i].end(); I != IE; ++I) {
+ Value *Ptr = GetPointerOperand(I->second);
+ assert(Ptr && "No pointer operand");
+ if (Ptr == LastNewPtr)
+ continue;
+
+ Instruction *RealNewPtr;
+ const SCEVConstant *Diff =
+ cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV));
+ if (Diff->isZero()) {
+ RealNewPtr = NewBasePtr;
+ } else {
+ Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
+ if (PtrIP && isa<Instruction>(NewBasePtr) &&
+ cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
+ PtrIP = 0;
+ else if (isa<PHINode>(PtrIP))
+ PtrIP = PtrIP->getParent()->getFirstInsertionPt();
+ else if (!PtrIP)
+ PtrIP = I->second;
+
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, Diff->getValue(),
+ I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
+ if (!PtrIP)
+ NewPtr->insertAfter(cast<Instruction>(PtrInc));
+ NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
+ RealNewPtr = NewPtr;
+ }
+
+ if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
+ BBChanged.insert(IDel->getParent());
+
+ Instruction *ReplNewPtr;
+ if (Ptr->getType() != RealNewPtr->getType()) {
+ ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
+ Ptr->hasName() ? Ptr->getName() + ".cast" : "");
+ ReplNewPtr->insertAfter(RealNewPtr);
+ } else
+ ReplNewPtr = RealNewPtr;
+
+ Ptr->replaceAllUsesWith(ReplNewPtr);
+ RecursivelyDeleteTriviallyDeadInstructions(Ptr);
+
+ LastNewPtr = RealNewPtr;
+ }
+
+ MadeChange = true;
+ }
+
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+ if (BBChanged.count(*I))
+ DeleteDeadPHIs(*I);
+ }
+
+ return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index c417199548ad..f1e28651aea2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -38,7 +38,7 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
const TargetMachine &TM = AP.TM;
Mangler *Mang = AP.Mang;
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
MCContext &Ctx = AP.OutContext;
bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
@@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
unsigned OrigLen = Name.size() - PrefixLen;
Name += Suffix;
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
// If the target flags on the operand changes the name of the symbol, do that
@@ -85,7 +85,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
} else {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false);
+ StubValueTy(Ctx.getOrCreateSymbol(OrigName), false);
}
return Sym;
}
@@ -137,12 +137,6 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
case PPCII::MO_TLS:
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
break;
- case PPCII::MO_TLSGD:
- RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
- break;
- case PPCII::MO_TLSLD:
- RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
- break;
}
if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
@@ -173,7 +167,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
}
- return MCOperand::CreateExpr(Expr);
+ return MCOperand::createExpr(Expr);
}
void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
@@ -190,13 +184,16 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
assert(!MO.getSubReg() && "Subregs should be eliminated!");
- MCOp = MCOperand::CreateReg(MO.getReg());
+ assert(MO.getReg() > PPC::NoRegister &&
+ MO.getReg() < PPC::NUM_TARGET_REGS &&
+ "Invalid register for this target!");
+ MCOp = MCOperand::createReg(MO.getReg());
break;
case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
+ MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
MO.getMBB()->getSymbol(), AP.OutContext));
break;
case MachineOperand::MO_GlobalAddress:
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 4aff95a8a657..ec4e0a5fa81b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -18,7 +18,8 @@ using namespace llvm;
void PPCFunctionInfo::anchor() { }
MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
- const DataLayout *DL = MF.getSubtarget().getDataLayout();
- return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
- Twine(MF.getFunctionNumber())+"$poff");
+ const DataLayout *DL = MF.getTarget().getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
+ Twine(MF.getFunctionNumber()) +
+ "$poff");
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 37b2ff83c45f..607cdf612eef 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -62,6 +62,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// entry, even though LR may otherwise apparently not be used.
bool LRStoreRequired;
+ /// This function makes use of the PPC64 ELF TOC base pointer (register r2).
+ bool UsesTOCBasePtr;
+
/// MinReservedArea - This is the frame size that is at least reserved in a
/// potential caller (parameter+linkage area).
unsigned MinReservedArea;
@@ -112,6 +115,7 @@ public:
SpillsCR(false),
SpillsVRSAVE(false),
LRStoreRequired(false),
+ UsesTOCBasePtr(false),
MinReservedArea(0),
TailCallSPDelta(0),
HasFastCall(false),
@@ -164,6 +168,9 @@ public:
void setLRStoreRequired() { LRStoreRequired = true; }
bool isLRStoreRequired() const { return LRStoreRequired; }
+ void setUsesTOCBasePtr() { UsesTOCBasePtr = true; }
+ bool usesTOCBasePtr() const { return UsesTOCBasePtr; }
+
void setHasFastCall() { HasFastCall = true; }
bool hasFastCall() const { return HasFastCall;}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index aa0da7a97b14..656376c641aa 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -56,11 +57,11 @@ static cl::opt<bool>
AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
cl::desc("Force the use of a base pointer in every function"));
-PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
- : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
- ST.isPPC64() ? 0 : 1,
- ST.isPPC64() ? 0 : 1),
- Subtarget(ST) {
+PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
+ : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
+ TM.isPPC64() ? 0 : 1,
+ TM.isPPC64() ? 0 : 1),
+ TM(TM) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -87,18 +88,19 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
// Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
// when it checks for ZERO folding.
if (Kind == 1) {
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return &PPC::G8RC_NOX0RegClass;
return &PPC::GPRC_NOR0RegClass;
}
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
}
const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_SaveList;
@@ -108,23 +110,28 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_Darwin64_Altivec_SaveList :
- CSR_Darwin64_SaveList) :
- (Subtarget.hasAltivec() ?
- CSR_Darwin32_Altivec_SaveList :
- CSR_Darwin32_SaveList);
-
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_SVR464_Altivec_SaveList :
- CSR_SVR464_SaveList) :
- (Subtarget.hasAltivec() ?
- CSR_SVR432_Altivec_SaveList :
- CSR_SVR432_SaveList);
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList
+ : CSR_Darwin64_SaveList)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
+ : CSR_Darwin32_SaveList);
+
+ // On PPC64, we might need to save r2 (but only if it is not reserved).
+ bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
+
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec()
+ ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+ : CSR_SVR464_Altivec_SaveList)
+ : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
+ : CSR_SVR432_SaveList);
}
-const uint32_t*
-PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_RegMask;
@@ -134,19 +141,15 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_Darwin64_Altivec_RegMask :
- CSR_Darwin64_RegMask) :
- (Subtarget.hasAltivec() ?
- CSR_Darwin32_Altivec_RegMask :
- CSR_Darwin32_RegMask);
-
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_SVR464_Altivec_RegMask :
- CSR_SVR464_RegMask) :
- (Subtarget.hasAltivec() ?
- CSR_SVR432_Altivec_RegMask :
- CSR_SVR432_RegMask);
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask
+ : CSR_Darwin64_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
+ : CSR_Darwin32_RegMask);
+
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
+ : CSR_SVR464_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
+ : CSR_SVR432_RegMask);
}
const uint32_t*
@@ -155,17 +158,15 @@ PPCRegisterInfo::getNoPreservedMask() const {
}
void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
- unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM };
- for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) {
- unsigned Reg = PseudoRegs[i];
- Mask[Reg / 32] &= ~(1u << (Reg % 32));
- }
+ for (unsigned PseudoReg : {PPC::ZERO, PPC::ZERO8, PPC::RM})
+ Mask[PseudoReg / 32] &= ~(1u << (PseudoReg % 32));
}
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
- MF.getSubtarget().getFrameLowering());
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
@@ -202,7 +203,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
// On PPC64, r13 is the thread pointer. Never allocate this register.
- if (Subtarget.isPPC64()) {
+ if (TM.isPPC64()) {
Reserved.set(PPC::R13);
Reserved.set(PPC::X1);
@@ -216,7 +217,16 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
if (Subtarget.isSVR4ABI()) {
- Reserved.set(PPC::X2);
+ // We only reserve r2 if we need to use the TOC pointer. If we have no
+ // explicit uses of the TOC pointer (meaning we're a leaf function with
+ // no constant-pool loads, etc.) and we have no potential uses inside an
+ // inline asm block, then we can treat r2 has an ordinary callee-saved
+ // register.
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ if (FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
+ Reserved.set(PPC::X2);
+ else
+ Reserved.reset(PPC::R2);
}
}
@@ -224,15 +234,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R31);
if (hasBasePointer(MF)) {
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R29);
else
Reserved.set(PPC::R30);
}
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -244,10 +254,10 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-unsigned
-PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
const unsigned DefaultSafety = 1;
switch (RC->getID()) {
@@ -262,6 +272,9 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
+ case PPC::QFRCRegClassID:
+ case PPC::QSRCRegClassID:
+ case PPC::QBRCRegClassID:
case PPC::VRRCRegClassID:
case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:
@@ -269,14 +282,17 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
return 32 - DefaultSafety;
case PPC::VSRCRegClassID:
case PPC::VSFRCRegClassID:
+ case PPC::VSSRCRegClassID:
return 64 - DefaultSafety;
case PPC::CRRCRegClassID:
return 8 - DefaultSafety;
}
}
-const TargetRegisterClass*
-PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
+const TargetRegisterClass *
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (Subtarget.hasVSX()) {
// With VSX, we can inflate various sub-register classes to the full VSX
// register set.
@@ -285,9 +301,11 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
return &PPC::VSFRCRegClass;
else if (RC == &PPC::VRRCRegClass)
return &PPC::VSRCRegClass;
+ else if (RC == &PPC::F4RCRegClass && Subtarget.hasP8Vector())
+ return &PPC::VSSRCRegClass;
}
- return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
}
//===----------------------------------------------------------------------===//
@@ -310,10 +328,11 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
MachineFunction &MF = *MBB.getParent();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
// Determine whether 64-bit pointers are used.
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
DebugLoc dl = MI.getDebugLoc();
// Get the maximum call stack size.
@@ -322,10 +341,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned FrameSize = MFI->getStackSize();
// Get stack alignments.
- unsigned TargetAlign = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
+ unsigned TargetAlign = Subtarget.getFrameLowering()->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
"Maximum call-frame size not sufficiently aligned");
@@ -430,10 +446,11 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -474,10 +491,11 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -509,37 +527,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-static unsigned getCRFromCRBit(unsigned SrcReg) {
- unsigned Reg = 0;
- if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
- SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
- Reg = PPC::CR0;
- else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
- SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
- Reg = PPC::CR1;
- else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
- SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
- Reg = PPC::CR2;
- else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
- SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
- Reg = PPC::CR3;
- else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
- SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
- Reg = PPC::CR4;
- else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
- SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
- Reg = PPC::CR5;
- else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
- SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
- Reg = PPC::CR6;
- else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
- SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
- Reg = PPC::CR7;
-
- assert(Reg != 0 && "Invalid CR bit register");
- return Reg;
-}
-
void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const {
// Get the instruction.
@@ -547,10 +534,11 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -590,10 +578,11 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -637,7 +626,8 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -662,7 +652,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -684,14 +675,14 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
bool
PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
-
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
// is arbitrary and will be subsequently ignored. For 32-bit, we have
// previously created the stack slot if needed, so return its FrameIdx.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
FrameIdx = 0;
else {
const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -744,8 +735,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
// Get the basic block's function.
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -811,8 +803,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// to Offset to get the correct offset.
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
- if (!MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) {
+ if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) {
if (!(hasBasePointer(MF) && FrameIndex < 0))
Offset += MFI->getStackSize();
}
@@ -835,7 +826,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- bool is64Bit = Subtarget.isPPC64();
+ bool is64Bit = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
@@ -873,23 +864,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
- if (!Subtarget.isPPC64())
+ if (!TM.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
else
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (!hasBasePointer(MF))
return getFrameRegister(MF);
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return PPC::X30;
if (Subtarget.isSVR4ABI() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ TM.getRelocationModel() == Reloc::PIC_)
return PPC::R29;
return PPC::R30;
@@ -915,16 +908,12 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
- bool requiresRealignment =
- ((MFI->getMaxAlignment() > StackAlign) ||
- F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackAlignment));
+ unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment();
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttribute(Attribute::StackAlignment));
return requiresRealignment && canRealignStack(MF);
}
@@ -957,9 +946,9 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
-
- const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
- MF.getSubtarget().getFrameLowering());
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
unsigned StackEst =
PPCFI->determineFrameLayout(MF, false, true);
@@ -976,7 +965,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The frame pointer will point to the end of the stack, so estimate the
// offset as the difference between the object offset and the FP location.
- return !isFrameOffsetLegal(MI, Offset);
+ return !isFrameOffsetLegal(MI, getBaseRegister(MF), Offset);
}
/// Insert defining instruction(s) for BaseReg to
@@ -985,7 +974,7 @@ void PPCRegisterInfo::
materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
int64_t Offset) const {
- unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@@ -993,7 +982,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
DL = Ins->getDebugLoc();
const MachineFunction &MF = *MBB->getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
@@ -1018,7 +1008,8 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = MI.getDesc();
MachineRegisterInfo &MRI = MF.getRegInfo();
MRI.constrainRegClass(BaseReg,
@@ -1026,6 +1017,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI->getOperand(FIOperandNum).isFI()) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 4c2ef9067cb2..d304e1d8b5ec 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -22,15 +22,44 @@
#include "PPCGenRegisterInfo.inc"
namespace llvm {
-class PPCSubtarget;
-class TargetInstrInfo;
-class Type;
+
+inline static unsigned getCRFromCRBit(unsigned SrcReg) {
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ assert(Reg != 0 && "Invalid CR bit register");
+ return Reg;
+}
+
class PPCRegisterInfo : public PPCGenRegisterInfo {
DenseMap<unsigned, unsigned> ImmToIdxMap;
- const PPCSubtarget &Subtarget;
+ const PPCTargetMachine &TM;
public:
- PPCRegisterInfo(const PPCSubtarget &SubTarget);
+ PPCRegisterInfo(const PPCTargetMachine &TM);
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
@@ -40,13 +69,14 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- const TargetRegisterClass*
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const;
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
@@ -97,7 +127,7 @@ public:
int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
// Debug information queries.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 2fdf04eaaeee..e5f363c443cd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -49,6 +49,13 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
+// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
+class QFPR<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
@@ -114,6 +121,12 @@ foreach Index = 0-31 in {
def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
}
+// QPX Floating-point registers
+foreach Index = 0-31 in {
+ def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
@@ -219,17 +232,50 @@ def RM: PPCReg<"**ROUNDING MODE**">;
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
(sequence "R%u", 30, 13),
- R31, R0, R1, FP, BP)>;
+ R31, R0, R1, FP, BP)> {
+ // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+ // put it at the end of the list.
+ let AltOrders = [(add (sub GPRC, R2), R2)];
+ let AltOrderSelect = [{
+ const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+ return S.isPPC64() && S.isSVR4ABI();
+ }];
+}
def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
(sequence "X%u", 30, 14),
- X31, X13, X0, X1, FP8, BP8)>;
+ X31, X13, X0, X1, FP8, BP8)> {
+ // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+ // put it at the end of the list.
+ let AltOrders = [(add (sub G8RC, X2), X2)];
+ let AltOrderSelect = [{
+ const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+ return S.isPPC64() && S.isSVR4ABI();
+ }];
+}
// For some instructions r0 is special (representing the value 0 instead of
// the value in the r0 register), and we use these register subclasses to
// prevent r0 from being allocated for use by those instructions.
-def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
-def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> {
+ // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+ // put it at the end of the list.
+ let AltOrders = [(add (sub GPRC_NOR0, R2), R2)];
+ let AltOrderSelect = [{
+ const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+ return S.isPPC64() && S.isSVR4ABI();
+ }];
+}
+
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> {
+ // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+ // put it at the end of the list.
+ let AltOrders = [(add (sub G8RC_NOX0, X2), X2)];
+ let AltOrderSelect = [{
+ const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+ return S.isPPC64() && S.isSVR4ABI();
+ }];
+}
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
@@ -242,7 +288,7 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
(sequence "F%u", 31, 14))>;
def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
-def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
(add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
@@ -270,6 +316,19 @@ def VFRC : RegisterClass<"PPC", [f64], 64,
VF22, VF21, VF20)>;
def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
+// Register class for single precision scalars in VSX registers
+def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
+
+// For QPX
+def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
+ (sequence "QF%u", 31, 14))>;
+def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
+def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
+ // These are actually stored as floating-point values where a positive
+ // number is true and anything else (including NaN) is false.
+ let Size = 256;
+}
+
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
@@ -285,6 +344,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
CR7, CR2, CR3, CR4)>;
+def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
+
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index 3ae3793f1a8b..d0954a11cd6a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -13,6 +13,7 @@
def IIC_IntSimple : InstrItinClass;
def IIC_IntGeneral : InstrItinClass;
def IIC_IntCompare : InstrItinClass;
+def IIC_IntISEL : InstrItinClass;
def IIC_IntDivD : InstrItinClass;
def IIC_IntDivW : InstrItinClass;
def IIC_IntMFFS : InstrItinClass;
@@ -123,400 +124,3 @@ include "PPCScheduleP8.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500mc.td"
include "PPCScheduleE5500.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction to itinerary class map - When add new opcodes to the supported
-// set, refer to the following table to determine which itinerary class the
-// opcode belongs.
-//
-// opcode itinerary class
-// ====== ===============
-// add IIC_IntSimple
-// addc IIC_IntGeneral
-// adde IIC_IntGeneral
-// addi IIC_IntSimple
-// addic IIC_IntGeneral
-// addic. IIC_IntGeneral
-// addis IIC_IntSimple
-// addme IIC_IntGeneral
-// addze IIC_IntGeneral
-// and IIC_IntSimple
-// andc IIC_IntSimple
-// andi. IIC_IntGeneral
-// andis. IIC_IntGeneral
-// b IIC_BrB
-// bc IIC_BrB
-// bcctr IIC_BrB
-// bclr IIC_BrB
-// cmp IIC_IntCompare
-// cmpi IIC_IntCompare
-// cmpl IIC_IntCompare
-// cmpli IIC_IntCompare
-// cntlzd IIC_IntRotateD
-// cntlzw IIC_IntGeneral
-// crand IIC_BrCR
-// crandc IIC_BrCR
-// creqv IIC_BrCR
-// crnand IIC_BrCR
-// crnor IIC_BrCR
-// cror IIC_BrCR
-// crorc IIC_BrCR
-// crxor IIC_BrCR
-// dcba IIC_LdStDCBA
-// dcbf IIC_LdStDCBF
-// dcbi IIC_LdStDCBI
-// dcbst IIC_LdStDCBF
-// dcbt IIC_LdStLoad
-// dcbtst IIC_LdStLoad
-// dcbz IIC_LdStDCBF
-// divd IIC_IntDivD
-// divdu IIC_IntDivD
-// divw IIC_IntDivW
-// divwu IIC_IntDivW
-// dss IIC_LdStDSS
-// dst IIC_LdStDSS
-// dstst IIC_LdStDSS
-// eciwx IIC_LdStLoad
-// ecowx IIC_LdStLoad
-// eieio IIC_LdStLoad
-// eqv IIC_IntSimple
-// extsb IIC_IntSimple
-// extsh IIC_IntSimple
-// extsw IIC_IntSimple
-// fabs IIC_FPGeneral
-// fadd IIC_FPAddSub
-// fadds IIC_FPGeneral
-// fcfid IIC_FPGeneral
-// fcmpo IIC_FPCompare
-// fcmpu IIC_FPCompare
-// fctid IIC_FPGeneral
-// fctidz IIC_FPGeneral
-// fctiw IIC_FPGeneral
-// fctiwz IIC_FPGeneral
-// fdiv IIC_FPDivD
-// fdivs IIC_FPDivS
-// fmadd IIC_FPFused
-// fmadds IIC_FPGeneral
-// fmr IIC_FPGeneral
-// fmsub IIC_FPFused
-// fmsubs IIC_FPGeneral
-// fmul IIC_FPFused
-// fmuls IIC_FPGeneral
-// fnabs IIC_FPGeneral
-// fneg IIC_FPGeneral
-// fnmadd IIC_FPFused
-// fnmadds IIC_FPGeneral
-// fnmsub IIC_FPFused
-// fnmsubs IIC_FPGeneral
-// fres IIC_FPRes
-// frsp IIC_FPGeneral
-// frsqrte IIC_FPGeneral
-// fsel IIC_FPGeneral
-// fsqrt IIC_FPSqrtD
-// fsqrts IIC_FPSqrtS
-// fsub IIC_FPAddSub
-// fsubs IIC_FPGeneral
-// icbi IIC_LdStICBI
-// isync IIC_SprISYNC
-// lbz IIC_LdStLoad
-// lbzu IIC_LdStLoadUpd
-// lbzux IIC_LdStLoadUpdX
-// lbzx IIC_LdStLoad
-// ld IIC_LdStLD
-// ldarx IIC_LdStLDARX
-// ldu IIC_LdStLDU
-// ldux IIC_LdStLDUX
-// ldx IIC_LdStLD
-// lfd IIC_LdStLFD
-// lfdu IIC_LdStLFDU
-// lfdux IIC_LdStLFDUX
-// lfdx IIC_LdStLFD
-// lfs IIC_LdStLFD
-// lfsu IIC_LdStLFDU
-// lfsux IIC_LdStLFDUX
-// lfsx IIC_LdStLFD
-// lha IIC_LdStLHA
-// lhau IIC_LdStLHAU
-// lhaux IIC_LdStLHAUX
-// lhax IIC_LdStLHA
-// lhbrx IIC_LdStLoad
-// lhz IIC_LdStLoad
-// lhzu IIC_LdStLoadUpd
-// lhzux IIC_LdStLoadUpdX
-// lhzx IIC_LdStLoad
-// lmw IIC_LdStLMW
-// lswi IIC_LdStLMW
-// lswx IIC_LdStLMW
-// lvebx IIC_LdStLVecX
-// lvehx IIC_LdStLVecX
-// lvewx IIC_LdStLVecX
-// lvsl IIC_LdStLVecX
-// lvsr IIC_LdStLVecX
-// lvx IIC_LdStLVecX
-// lvxl IIC_LdStLVecX
-// lwa IIC_LdStLWA
-// lwarx IIC_LdStLWARX
-// lwaux IIC_LdStLHAUX
-// lwax IIC_LdStLHA
-// lwbrx IIC_LdStLoad
-// lwz IIC_LdStLoad
-// lwzu IIC_LdStLoadUpd
-// lwzux IIC_LdStLoadUpdX
-// lwzx IIC_LdStLoad
-// mcrf IIC_BrMCR
-// mcrfs IIC_FPGeneral
-// mcrxr IIC_BrMCRX
-// mfcr IIC_SprMFCR
-// mffs IIC_IntMFFS
-// mfmsr IIC_SprMFMSR
-// mfspr IIC_SprMFSPR
-// mfsr IIC_SprMFSR
-// mfsrin IIC_SprMFSR
-// mftb IIC_SprMFTB
-// mfvscr IIC_IntMFVSCR
-// mtcrf IIC_BrMCRX
-// mtfsb0 IIC_IntMTFSB0
-// mtfsb1 IIC_IntMTFSB0
-// mtfsf IIC_IntMTFSB0
-// mtfsfi IIC_IntMTFSB0
-// mtmsr IIC_SprMTMSR
-// mtmsrd IIC_LdStLD
-// mtspr IIC_SprMTSPR
-// mtsr IIC_SprMTSR
-// mtsrd IIC_IntMTSRD
-// mtsrdin IIC_IntMTSRD
-// mtsrin IIC_SprMTSRIN
-// mtvscr IIC_IntMFVSCR
-// mulhd IIC_IntMulHD
-// mulhdu IIC_IntMulHD
-// mulhw IIC_IntMulHW
-// mulhwu IIC_IntMulHWU
-// mulld IIC_IntMulHD
-// mulli IIC_IntMulLI
-// mullw IIC_IntMulHW
-// nand IIC_IntSimple
-// neg IIC_IntSimple
-// nor IIC_IntSimple
-// or IIC_IntSimple
-// orc IIC_IntSimple
-// ori IIC_IntSimple
-// oris IIC_IntSimple
-// rfi IIC_SprRFI
-// rfid IIC_IntRFID
-// rldcl IIC_IntRotateD
-// rldcr IIC_IntRotateD
-// rldic IIC_IntRotateDI
-// rldicl IIC_IntRotateDI
-// rldicr IIC_IntRotateDI
-// rldimi IIC_IntRotateDI
-// rlwimi IIC_IntRotate
-// rlwinm IIC_IntGeneral
-// rlwnm IIC_IntGeneral
-// sc IIC_SprSC
-// slbia IIC_LdStSLBIA
-// slbie IIC_LdStSLBIE
-// sld IIC_IntRotateD
-// slw IIC_IntGeneral
-// srad IIC_IntRotateD
-// sradi IIC_IntRotateDI
-// sraw IIC_IntShift
-// srawi IIC_IntShift
-// srd IIC_IntRotateD
-// srw IIC_IntGeneral
-// stb IIC_LdStStore
-// stbu IIC_LdStStoreUpd
-// stbux IIC_LdStStoreUpd
-// stbx IIC_LdStStore
-// std IIC_LdStSTD
-// stdcx. IIC_LdStSTDCX
-// stdu IIC_LdStSTDU
-// stdux IIC_LdStSTDUX
-// stdx IIC_LdStSTD
-// stfd IIC_LdStSTFD
-// stfdu IIC_LdStSTFDU
-// stfdux IIC_LdStSTFDU
-// stfdx IIC_LdStSTFD
-// stfiwx IIC_LdStSTFD
-// stfs IIC_LdStSTFD
-// stfsu IIC_LdStSTFDU
-// stfsux IIC_LdStSTFDU
-// stfsx IIC_LdStSTFD
-// sth IIC_LdStStore
-// sthbrx IIC_LdStStore
-// sthu IIC_LdStStoreUpd
-// sthux IIC_LdStStoreUpd
-// sthx IIC_LdStStore
-// stmw IIC_LdStLMW
-// stswi IIC_LdStLMW
-// stswx IIC_LdStLMW
-// stvebx IIC_LdStSTVEBX
-// stvehx IIC_LdStSTVEBX
-// stvewx IIC_LdStSTVEBX
-// stvx IIC_LdStSTVEBX
-// stvxl IIC_LdStSTVEBX
-// stw IIC_LdStStore
-// stwbrx IIC_LdStStore
-// stwcx. IIC_LdStSTWCX
-// stwu IIC_LdStStoreUpd
-// stwux IIC_LdStStoreUpd
-// stwx IIC_LdStStore
-// subf IIC_IntGeneral
-// subfc IIC_IntGeneral
-// subfe IIC_IntGeneral
-// subfic IIC_IntGeneral
-// subfme IIC_IntGeneral
-// subfze IIC_IntGeneral
-// sync IIC_LdStSync
-// td IIC_IntTrapD
-// tdi IIC_IntTrapD
-// tlbia IIC_LdStSLBIA
-// tlbie IIC_LdStDCBF
-// tlbsync IIC_SprTLBSYNC
-// tw IIC_IntTrapW
-// twi IIC_IntTrapW
-// vaddcuw IIC_VecGeneral
-// vaddfp IIC_VecFP
-// vaddsbs IIC_VecGeneral
-// vaddshs IIC_VecGeneral
-// vaddsws IIC_VecGeneral
-// vaddubm IIC_VecGeneral
-// vaddubs IIC_VecGeneral
-// vadduhm IIC_VecGeneral
-// vadduhs IIC_VecGeneral
-// vadduwm IIC_VecGeneral
-// vadduws IIC_VecGeneral
-// vand IIC_VecGeneral
-// vandc IIC_VecGeneral
-// vavgsb IIC_VecGeneral
-// vavgsh IIC_VecGeneral
-// vavgsw IIC_VecGeneral
-// vavgub IIC_VecGeneral
-// vavguh IIC_VecGeneral
-// vavguw IIC_VecGeneral
-// vcfsx IIC_VecFP
-// vcfux IIC_VecFP
-// vcmpbfp IIC_VecFPCompare
-// vcmpeqfp IIC_VecFPCompare
-// vcmpequb IIC_VecGeneral
-// vcmpequh IIC_VecGeneral
-// vcmpequw IIC_VecGeneral
-// vcmpgefp IIC_VecFPCompare
-// vcmpgtfp IIC_VecFPCompare
-// vcmpgtsb IIC_VecGeneral
-// vcmpgtsh IIC_VecGeneral
-// vcmpgtsw IIC_VecGeneral
-// vcmpgtub IIC_VecGeneral
-// vcmpgtuh IIC_VecGeneral
-// vcmpgtuw IIC_VecGeneral
-// vctsxs IIC_VecFP
-// vctuxs IIC_VecFP
-// vexptefp IIC_VecFP
-// vlogefp IIC_VecFP
-// vmaddfp IIC_VecFP
-// vmaxfp IIC_VecFPCompare
-// vmaxsb IIC_VecGeneral
-// vmaxsh IIC_VecGeneral
-// vmaxsw IIC_VecGeneral
-// vmaxub IIC_VecGeneral
-// vmaxuh IIC_VecGeneral
-// vmaxuw IIC_VecGeneral
-// vmhaddshs IIC_VecComplex
-// vmhraddshs IIC_VecComplex
-// vminfp IIC_VecFPCompare
-// vminsb IIC_VecGeneral
-// vminsh IIC_VecGeneral
-// vminsw IIC_VecGeneral
-// vminub IIC_VecGeneral
-// vminuh IIC_VecGeneral
-// vminuw IIC_VecGeneral
-// vmladduhm IIC_VecComplex
-// vmrghb IIC_VecPerm
-// vmrghh IIC_VecPerm
-// vmrghw IIC_VecPerm
-// vmrglb IIC_VecPerm
-// vmrglh IIC_VecPerm
-// vmrglw IIC_VecPerm
-// vmsubfp IIC_VecFP
-// vmsummbm IIC_VecComplex
-// vmsumshm IIC_VecComplex
-// vmsumshs IIC_VecComplex
-// vmsumubm IIC_VecComplex
-// vmsumuhm IIC_VecComplex
-// vmsumuhs IIC_VecComplex
-// vmulesb IIC_VecComplex
-// vmulesh IIC_VecComplex
-// vmuleub IIC_VecComplex
-// vmuleuh IIC_VecComplex
-// vmulosb IIC_VecComplex
-// vmulosh IIC_VecComplex
-// vmuloub IIC_VecComplex
-// vmulouh IIC_VecComplex
-// vnor IIC_VecGeneral
-// vor IIC_VecGeneral
-// vperm IIC_VecPerm
-// vpkpx IIC_VecPerm
-// vpkshss IIC_VecPerm
-// vpkshus IIC_VecPerm
-// vpkswss IIC_VecPerm
-// vpkswus IIC_VecPerm
-// vpkuhum IIC_VecPerm
-// vpkuhus IIC_VecPerm
-// vpkuwum IIC_VecPerm
-// vpkuwus IIC_VecPerm
-// vrefp IIC_VecFPRound
-// vrfim IIC_VecFPRound
-// vrfin IIC_VecFPRound
-// vrfip IIC_VecFPRound
-// vrfiz IIC_VecFPRound
-// vrlb IIC_VecGeneral
-// vrlh IIC_VecGeneral
-// vrlw IIC_VecGeneral
-// vrsqrtefp IIC_VecFP
-// vsel IIC_VecGeneral
-// vsl IIC_VecVSL
-// vslb IIC_VecGeneral
-// vsldoi IIC_VecPerm
-// vslh IIC_VecGeneral
-// vslo IIC_VecPerm
-// vslw IIC_VecGeneral
-// vspltb IIC_VecPerm
-// vsplth IIC_VecPerm
-// vspltisb IIC_VecPerm
-// vspltish IIC_VecPerm
-// vspltisw IIC_VecPerm
-// vspltw IIC_VecPerm
-// vsr IIC_VecVSR
-// vsrab IIC_VecGeneral
-// vsrah IIC_VecGeneral
-// vsraw IIC_VecGeneral
-// vsrb IIC_VecGeneral
-// vsrh IIC_VecGeneral
-// vsro IIC_VecPerm
-// vsrw IIC_VecGeneral
-// vsubcuw IIC_VecGeneral
-// vsubfp IIC_VecFP
-// vsubsbs IIC_VecGeneral
-// vsubshs IIC_VecGeneral
-// vsubsws IIC_VecGeneral
-// vsububm IIC_VecGeneral
-// vsububs IIC_VecGeneral
-// vsubuhm IIC_VecGeneral
-// vsubuhs IIC_VecGeneral
-// vsubuwm IIC_VecGeneral
-// vsubuws IIC_VecGeneral
-// vsum2sws IIC_VecComplex
-// vsum4sbs IIC_VecComplex
-// vsum4shs IIC_VecComplex
-// vsum4ubs IIC_VecComplex
-// vsumsws IIC_VecComplex
-// vupkhpx IIC_VecPerm
-// vupkhsb IIC_VecPerm
-// vupkhsh IIC_VecPerm
-// vupklpx IIC_VecPerm
-// vupklsb IIC_VecPerm
-// vupklsh IIC_VecPerm
-// vxor IIC_VecGeneral
-// xor IIC_IntSimple
-// xori IIC_IntSimple
-// xoris IIC_IntSimple
-//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
index 218fed248a31..04a43bc03251 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -121,6 +121,14 @@ def PPC440Itineraries : ProcessorItineraries<
[2, 0, 0],
[P440_GPR_Bypass,
P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass, NoBypass]>,
InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_IRACC, P440_LRACC]>,
InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index 14476963bad0..21a357a2efcf 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -29,6 +29,8 @@ def PPCA2Itineraries : ProcessorItineraries<
[1, 0, 0]>,
InstrItinData<IIC_IntGeneral, [InstrStage<1, [A2_XU]>],
[2, 0, 0]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
InstrItinData<IIC_IntCompare, [InstrStage<1, [A2_XU]>],
[2, 0, 0]>,
InstrItinData<IIC_IntDivW, [InstrStage<1, [A2_XU]>],
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index dab89e3db353..36b8517dabf1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -54,6 +54,12 @@ def PPCE500mcItineraries : ProcessorItineraries<
[4, 1, 1], // Latency = 1
[E500_GPR_Bypass,
E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass,
+ E500_CR_Bypass]>,
InstrItinData<IIC_IntCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
InstrStage<1, [E500_SFX0, E500_SFX1]>],
[5, 1, 1], // Latency = 1 or 2
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index de097d9d8cf5..7c2693ef0d4f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -58,6 +58,12 @@ def PPCE5500Itineraries : ProcessorItineraries<
[5, 2, 2], // Latency = 1
[E5500_GPR_Bypass,
E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass,
+ E5500_CR_Bypass]>,
InstrItinData<IIC_IntCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
[6, 2, 2], // Latency = 1 or 2
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 03693cbeada0..635d154d10bf 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -89,6 +89,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[1, 1, 1]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [1, 1, 1, 1]>,
InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 07971809c877..020739baec3a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -66,6 +66,10 @@ def P8Itineraries : ProcessorItineraries<
InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
P8_LU2, P8_LSU1, P8_LSU2]>],
[1, 1, 1]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
+ InstrStage<1, [P8_BRU]>],
+ [1, 1, 1, 1]>,
InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index d8ba1c71a0f5..f313b0a6f178 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -14,13 +14,13 @@
#include "PPCSubtarget.h"
#include "PPC.h"
#include "PPCRegisterInfo.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
@@ -36,39 +36,9 @@ using namespace llvm;
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
-/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const Triple &T) {
- bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
- std::string Ret;
-
- // Most PPC* platforms are big endian, PPC64LE is little endian.
- if (T.getArch() == Triple::ppc64le)
- Ret = "e";
- else
- Ret = "E";
-
- Ret += DataLayout::getManglingComponent(T);
-
- // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
- // pointers.
- if (!is64Bit || T.getOS() == Triple::Lv2)
- Ret += "-p:32:32";
-
- // Note, the alignment values for f64 and i64 on ppc64 in Darwin
- // documentation are wrong; these are correct (i.e. "what gcc does").
- if (is64Bit || !T.isOSDarwin())
- Ret += "-i64:64";
- else
- Ret += "-f64:32:64";
-
- // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
- if (is64Bit)
- Ret += "-n32:64";
- else
- Ret += "-n32";
-
- return Ret;
-}
+static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
+ cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
+ cl::Hidden);
PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
@@ -80,12 +50,10 @@ PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const PPCTargetMachine &TM)
: PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
- DL(getDataLayoutString(TargetTriple)),
IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
TargetTriple.getArch() == Triple::ppc64le),
- TargetABI(PPC_ABI_UNKNOWN),
- FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this),
- TLInfo(TM), TSInfo(&DL) {}
+ TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
+ InstrInfo(*this), TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
void PPCSubtarget::initializeEnvironment() {
StackAlignment = 16;
@@ -99,6 +67,8 @@ void PPCSubtarget::initializeEnvironment() {
HasQPX = false;
HasVSX = false;
HasP8Vector = false;
+ HasP8Altivec = false;
+ HasP8Crypto = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -112,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() {
HasFPCVT = false;
HasISEL = false;
HasPOPCNTD = false;
+ HasBPERMD = false;
+ HasExtDiv = false;
HasCMPB = false;
HasLDBRX = false;
IsBookE = false;
@@ -122,18 +94,24 @@ void PPCSubtarget::initializeEnvironment() {
DeprecatedMFTB = false;
DeprecatedDST = false;
HasLazyResolverStubs = false;
+ HasICBT = false;
+ HasInvariantFunctionDescriptors = false;
+ HasPartwordAtomics = false;
+ HasDirectMove = false;
+ IsQPXStackUnaligned = false;
+ HasHTM = false;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine default and user specified characteristics
std::string CPUName = CPU;
- if (CPUName.empty())
- CPUName = "generic";
-#if (defined(__APPLE__) || defined(__linux__)) && \
- (defined(__ppc__) || defined(__powerpc__))
- if (CPUName == "generic")
- CPUName = sys::getHostCPUName();
-#endif
+ if (CPUName.empty()) {
+ // If cross-compiling with -march=ppc64le without -mcpu
+ if (TargetTriple.getArch() == Triple::ppc64le)
+ CPUName = "ppc64le";
+ else
+ CPUName = "generic";
+ }
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
@@ -153,28 +131,18 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// QPX requires a 32-byte aligned stack. Note that we need to do this if
// we're compiling for a BG/Q system regardless of whether or not QPX
// is enabled because external functions will assume this alignment.
- if (hasQPX() || isBGQ())
- StackAlignment = 32;
+ IsQPXStackUnaligned = QPXStackUnaligned;
+ StackAlignment = getPlatformStackAlignment();
// Determine endianness.
+ // FIXME: Part of the TargetMachine.
IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
-
- // Determine default ABI.
- if (TargetABI == PPC_ABI_UNKNOWN) {
- if (!isDarwin() && IsPPC64) {
- if (IsLittleEndian)
- TargetABI = PPC_ABI_ELFv2;
- else
- TargetABI = PPC_ABI_ELFv1;
- }
- }
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
/// to go through a dyld lazy resolution stub. This means that an extra load
/// is required to get the address of the global.
-bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
- const TargetMachine &TM) const {
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
// We never have stubs if HasLazyResolverStubs=false or if in static mode.
if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
return false;
@@ -242,3 +210,5 @@ bool PPCSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
+bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
+bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index a1b644d26858..8d955088634a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -68,9 +68,6 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
- // Calculates type size & alignment
- const DataLayout DL;
-
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned StackAlignment;
@@ -92,6 +89,8 @@ protected:
bool HasQPX;
bool HasVSX;
bool HasP8Vector;
+ bool HasP8Altivec;
+ bool HasP8Crypto;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -102,6 +101,8 @@ protected:
bool HasFPCVT;
bool HasISEL;
bool HasPOPCNTD;
+ bool HasBPERMD;
+ bool HasExtDiv;
bool HasCMPB;
bool HasLDBRX;
bool IsBookE;
@@ -113,13 +114,18 @@ protected:
bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsLittleEndian;
-
- enum {
- PPC_ABI_UNKNOWN,
- PPC_ABI_ELFv1,
- PPC_ABI_ELFv2
- } TargetABI;
-
+ bool HasICBT;
+ bool HasInvariantFunctionDescriptors;
+ bool HasPartwordAtomics;
+ bool HasDirectMove;
+ bool HasHTM;
+
+ /// When targeting QPX running a stock PPC64 Linux kernel where the stack
+ /// alignment has not been changed, we need to keep the 16-byte alignment
+ /// of the stack.
+ bool IsQPXStackUnaligned;
+
+ const PPCTargetMachine &TM;
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
PPCTargetLowering TLInfo;
@@ -154,7 +160,6 @@ public:
const PPCFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- const DataLayout *getDataLayout() const override { return &DL; }
const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const PPCTargetLowering *getTargetLowering() const override {
return &TLInfo;
@@ -165,6 +170,7 @@ public:
const PPCRegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
+ const PPCTargetMachine &getTargetMachine() const { return TM; }
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
@@ -177,7 +183,7 @@ private:
public:
/// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
///
- bool isPPC64() const { return IsPPC64; }
+ bool isPPC64() const;
/// has64BitSupport - Return true if the selected CPU supports 64-bit
/// instructions, regardless of whether we are in 32-bit or 64-bit mode.
@@ -195,8 +201,7 @@ public:
/// hasLazyResolverStub - Return true if accesses to the specified global have
/// to go through a dyld lazy resolution stub. This means that an extra load
/// is required to get the address of the global.
- bool hasLazyResolverStub(const GlobalValue *GV,
- const TargetMachine &TM) const;
+ bool hasLazyResolverStub(const GlobalValue *GV) const;
// isLittleEndian - True if generating little-endian code
bool isLittleEndian() const { return IsLittleEndian; }
@@ -218,9 +223,13 @@ public:
bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
bool hasP8Vector() const { return HasP8Vector; }
+ bool hasP8Altivec() const { return HasP8Altivec; }
+ bool hasP8Crypto() const { return HasP8Crypto; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasBPERMD() const { return HasBPERMD; }
+ bool hasExtDiv() const { return HasExtDiv; }
bool hasCMPB() const { return HasCMPB; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
@@ -230,6 +239,21 @@ public:
bool isE500() const { return IsE500; }
bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
bool isDeprecatedDST() const { return DeprecatedDST; }
+ bool hasICBT() const { return HasICBT; }
+ bool hasInvariantFunctionDescriptors() const {
+ return HasInvariantFunctionDescriptors;
+ }
+ bool hasPartwordAtomics() const { return HasPartwordAtomics; }
+ bool hasDirectMove() const { return HasDirectMove; }
+
+ bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
+ unsigned getPlatformStackAlignment() const {
+ if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
+ return 32;
+
+ return 16;
+ }
+ bool hasHTM() const { return HasHTM; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -241,9 +265,9 @@ public:
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
- bool isDarwinABI() const { return isDarwin(); }
- bool isSVR4ABI() const { return !isDarwin(); }
- bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
+ bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
+ bool isSVR4ABI() const { return !isDarwinABI(); }
+ bool isELFv2ABI() const;
bool enableEarlyIfConversion() const override { return hasISEL(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
new file mode 100644
index 000000000000..2dc0d825c80d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -0,0 +1,170 @@
+//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into
+// separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of
+// which define GPR3. A copy is added from GPR3 to the target virtual
+// register of the original instruction. The GETtlsADDR[32] is really
+// a call instruction, so its target register is constrained to be GPR3.
+// This is not true of ADDItls[gd]L[32], but there is a legacy linker
+// optimization bug that requires the target register of the addi of
+// a local- or general-dynamic TLS access sequence to be GPR3.
+//
+// This is done in a late pass so that TLS variable accesses can be
+// fully commoned by MachineCSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-tls-dynamic-call"
+
+namespace llvm {
+ void initializePPCTLSDynamicCallPass(PassRegistry&);
+}
+
+namespace {
+ struct PPCTLSDynamicCall : public MachineFunctionPass {
+ static char ID;
+ PPCTLSDynamicCall() : MachineFunctionPass(ID) {
+ initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCInstrInfo *TII;
+ LiveIntervals *LIS;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE;) {
+ MachineInstr *MI = I;
+
+ if (MI->getOpcode() != PPC::ADDItlsgdLADDR &&
+ MI->getOpcode() != PPC::ADDItlsldLADDR &&
+ MI->getOpcode() != PPC::ADDItlsgdLADDR32 &&
+ MI->getOpcode() != PPC::ADDItlsldLADDR32) {
+ ++I;
+ continue;
+ }
+
+ DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;);
+
+ unsigned OutReg = MI->getOperand(0).getReg();
+ unsigned InReg = MI->getOperand(1).getReg();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
+ unsigned Opc1, Opc2;
+ SmallVector<unsigned, 4> OrigRegs;
+ OrigRegs.push_back(OutReg);
+ OrigRegs.push_back(InReg);
+ OrigRegs.push_back(GPR3);
+
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Opcode inconsistency error");
+ case PPC::ADDItlsgdLADDR:
+ Opc1 = PPC::ADDItlsgdL;
+ Opc2 = PPC::GETtlsADDR;
+ break;
+ case PPC::ADDItlsldLADDR:
+ Opc1 = PPC::ADDItlsldL;
+ Opc2 = PPC::GETtlsldADDR;
+ break;
+ case PPC::ADDItlsgdLADDR32:
+ Opc1 = PPC::ADDItlsgdL32;
+ Opc2 = PPC::GETtlsADDR32;
+ break;
+ case PPC::ADDItlsldLADDR32:
+ Opc1 = PPC::ADDItlsldL32;
+ Opc2 = PPC::GETtlsldADDR32;
+ break;
+ }
+
+ // Expand into two ops built prior to the existing instruction.
+ MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
+ .addReg(InReg);
+ Addi->addOperand(MI->getOperand(2));
+
+ // The ADDItls* instruction is the first instruction in the
+ // repair range.
+ MachineBasicBlock::iterator First = I;
+ --First;
+
+ MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
+ .addReg(GPR3));
+ Call->addOperand(MI->getOperand(3));
+
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
+ .addReg(GPR3);
+
+ // The COPY is the last instruction in the repair range.
+ MachineBasicBlock::iterator Last = I;
+ --Last;
+
+ // Move past the original instruction and remove it.
+ ++I;
+ MI->removeFromParent();
+
+ // Repair the live intervals.
+ LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs);
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
+ "PowerPC TLS Dynamic Call Fixup", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
+ "PowerPC TLS Dynamic Call Fixup", false, false)
+
+char PPCTLSDynamicCall::ID = 0;
+FunctionPass*
+llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
new file mode 100644
index 000000000000..bf165c9edc6e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -0,0 +1,156 @@
+//===-- PPCTOCRegDeps.cpp - Add Extra TOC Register Dependencies -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// When resolving an address using the ELF ABI TOC pointer, two relocations are
+// generally required: one for the high part and one for the low part. Only
+// the high part generally explicitly depends on r2 (the TOC pointer). And, so,
+// we might produce code like this:
+//
+// .Ltmp526:
+// addis 3, 2, .LC12@toc@ha
+// .Ltmp1628:
+// std 2, 40(1)
+// ld 5, 0(27)
+// ld 2, 8(27)
+// ld 11, 16(27)
+// ld 3, .LC12@toc@l(3)
+// rldicl 4, 4, 0, 32
+// mtctr 5
+// bctrl
+// ld 2, 40(1)
+//
+// And there is nothing wrong with this code, as such, but there is a linker bug
+// in binutils (https://sourceware.org/bugzilla/show_bug.cgi?id=18414) that will
+// misoptimize this code sequence to this:
+// nop
+// std r2,40(r1)
+// ld r5,0(r27)
+// ld r2,8(r27)
+// ld r11,16(r27)
+// ld r3,-32472(r2)
+// clrldi r4,r4,32
+// mtctr r5
+// bctrl
+// ld r2,40(r1)
+// because the linker does not know (and does not check) that the value in r2
+// changed in between the instruction using the .LC12@toc@ha (TOC-relative)
+// relocation and the instruction using the .LC12@toc@l(3) relocation.
+// Because it finds these instructions using the relocations (and not by
+// scanning the instructions), it has been asserted that there is no good way
+// to detect the change of r2 in between. As a result, this bug may never be
+// fixed (i.e. it may become part of the definition of the ABI). GCC was
+// updated to add extra dependencies on r2 to instructions using the @toc@l
+// relocations to avoid this problem, and we'll do the same here.
+//
+// This is done as a separate pass because:
+// 1. These extra r2 dependencies are not really properties of the
+// instructions, but rather due to a linker bug, and maybe one day we'll be
+// able to get rid of them when targeting linkers without this bug (and,
+// thus, keeping the logic centralized here will make that
+// straightforward).
+// 2. There are ISel-level peephole optimizations that propagate the @toc@l
+// relocations to some user instructions, and so the exta dependencies do
+// not apply only to a fixed set of instructions (without undesirable
+// definition replication).
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-toc-reg-deps"
+
+namespace llvm {
+ void initializePPCTOCRegDepsPass(PassRegistry&);
+}
+
+namespace {
+ // PPCTOCRegDeps pass - For simple functions without epilogue code, move
+ // returns up, and create conditional returns, to avoid unnecessary
+ // branch-to-blr sequences.
+ struct PPCTOCRegDeps : public MachineFunctionPass {
+ static char ID;
+ PPCTOCRegDeps() : MachineFunctionPass(ID) {
+ initializePPCTOCRegDepsPass(*PassRegistry::getPassRegistry());
+ }
+
+protected:
+ bool hasTOCLoReloc(const MachineInstr &MI) {
+ if (MI.getOpcode() == PPC::LDtocL ||
+ MI.getOpcode() == PPC::ADDItocL)
+ return true;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if ((MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) == PPCII::MO_TOC_LO)
+ return true;
+ }
+
+ return false;
+ }
+
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (auto &MI : MBB) {
+ if (!hasTOCLoReloc(MI))
+ continue;
+
+ MI.addOperand(MachineOperand::CreateReg(PPC::X2,
+ false /*IsDef*/,
+ true /*IsImp*/));
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCTOCRegDeps, DEBUG_TYPE,
+ "PowerPC TOC Register Dependencies", false, false)
+
+char PPCTOCRegDeps::ID = 0;
+FunctionPass*
+llvm::createPPCTOCRegDepsPass() { return new PPCTOCRegDeps(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index e07fd0575b50..50d4395dfbe8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -14,10 +14,11 @@
#include "PPCTargetMachine.h"
#include "PPC.h"
#include "PPCTargetObjectFile.h"
+#include "PPCTargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
@@ -29,15 +30,33 @@ static cl::
opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
cl::desc("Disable CTR loops for PPC"));
+static cl::
+opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
+ cl::desc("Disable PPC loop preinc prep"));
+
static cl::opt<bool>
VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
+static cl::
+opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
+ cl::desc("Disable VSX Swap Removal for PPC"));
+
static cl::opt<bool>
EnableGEPOpt("ppc-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
cl::init(true));
+static cl::opt<bool>
+EnablePrefetch("enable-ppc-prefetching",
+ cl::desc("disable software prefetching on PPC"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
+ cl::desc("Add extra TOC register dependencies"),
+ cl::init(true), cl::Hidden);
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -45,6 +64,40 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC64LE is little endian.
+ if (T.getArch() == Triple::ppc64le)
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += DataLayout::getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!is64Bit || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (is64Bit || !T.isOSDarwin())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (is64Bit)
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, StringRef TT) {
std::string FullFS = FS;
Triple TargetTriple(TT);
@@ -64,6 +117,14 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, String
else
FullFS = "+crbits";
}
+
+ if (OL != CodeGenOpt::None) {
+ if (!FullFS.empty())
+ FullFS = "+invariant-function-descriptors," + FullFS;
+ else
+ FullFS = "+invariant-function-descriptors";
+ }
+
return FullFS;
}
@@ -76,6 +137,30 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<PPC64LinuxTargetObjectFile>();
}
+static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
+ const TargetOptions &Options) {
+ if (Options.MCOptions.getABIName().startswith("elfv1"))
+ return PPCTargetMachine::PPC_ABI_ELFv1;
+ else if (Options.MCOptions.getABIName().startswith("elfv2"))
+ return PPCTargetMachine::PPC_ABI_ELFv2;
+
+ assert(Options.MCOptions.getABIName().empty() &&
+ "Unknown target-abi option!");
+
+ if (!TT.isMacOSX()) {
+ switch (TT.getArch()) {
+ case Triple::ppc64le:
+ return PPCTargetMachine::PPC_ABI_ELFv2;
+ case Triple::ppc64:
+ return PPCTargetMachine::PPC_ABI_ELFv1;
+ default:
+ // Fallthrough.
+ ;
+ }
+ }
+ return PPCTargetMachine::PPC_ABI_UNKNOWN;
+}
+
// The FeatureString here is a little subtle. We are modifying the feature string
// with what are (currently) non-function specific overrides as it goes into the
// LLVMTargetMachine constructor and then using the stored value in the
@@ -84,10 +169,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
- CM, OL),
+ : LLVMTargetMachine(T, getDataLayoutString(Triple(TT)), TT, CPU,
+ computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- Subtarget(TT, CPU, TargetFS, *this) {
+ TargetABI(computeTargetABI(Triple(TT), Options)) {
initAsmInfo();
}
@@ -115,11 +200,8 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
const PPCSubtarget *
PPCTargetMachine::getSubtargetImpl(const Function &F) const {
- AttributeSet FnAttrs = F.getAttributes();
- Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
- Attribute FSAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
? CPUAttr.getValueAsString().str()
@@ -134,7 +216,15 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+ I = llvm::make_unique<PPCSubtarget>(
+ TargetTriple, CPU,
+ // FIXME: It would be good to have the subtarget additions here
+ // not necessary. Anything that turns them on/off (overrides) ends
+ // up being put at the end of the feature string, but the defaults
+ // shouldn't require adding them. Fixing this means pulling Feature64Bit
+ // out of most of the target cpus in the .td file and making it set only
+ // as part of initialization via the TargetTriple.
+ computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
}
return I.get();
}
@@ -154,14 +244,11 @@ public:
return getTM<PPCTargetMachine>();
}
- const PPCSubtarget &getPPCSubtarget() const {
- return *getPPCTargetMachine().getSubtargetImpl();
- }
-
void addIRPasses() override;
bool addPreISel() override;
bool addILPOpts() override;
bool addInstSelector() override;
+ void addMachineSSAOptimization() override;
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -175,6 +262,16 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
void PPCPassConfig::addIRPasses() {
addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+ // For the BG/Q (or if explicitly requested), add explicit data prefetch
+ // intrinsics.
+ bool UsePrefetching =
+ Triple(TM->getTargetTriple()).getVendor() == Triple::BGQ &&
+ getOptLevel() != CodeGenOpt::None;
+ if (EnablePrefetch.getNumOccurrences() > 0)
+ UsePrefetching = EnablePrefetch;
+ if (UsePrefetching)
+ addPass(createPPCLoopDataPrefetchPass());
+
if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
// and lower a GEP with multiple indices to either arithmetic operations or
@@ -192,6 +289,9 @@ void PPCPassConfig::addIRPasses() {
}
bool PPCPassConfig::addPreISel() {
+ if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
+
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createPPCCTRLoops(getPPCTargetMachine()));
@@ -216,15 +316,26 @@ bool PPCPassConfig::addInstSelector() {
return false;
}
+void PPCPassConfig::addMachineSSAOptimization() {
+ TargetPassConfig::addMachineSSAOptimization();
+ // For little endian, remove where possible the vector swap instructions
+ // introduced at code generation to normalize vector element order.
+ if (Triple(TM->getTargetTriple()).getArch() == Triple::ppc64le &&
+ !DisableVSXSwapRemoval)
+ addPass(createPPCVSXSwapRemovalPass());
+}
+
void PPCPassConfig::addPreRegAlloc() {
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
&PPCVSXFMAMutateID);
+ if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_)
+ addPass(createPPCTLSDynamicCallPass());
+ if (EnableExtraTOCRegDeps)
+ addPass(createPPCTOCRegDepsPass());
}
void PPCPassConfig::addPreSched2() {
- addPass(createPPCVSXCopyCleanupPass(), false);
-
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
}
@@ -236,11 +347,7 @@ void PPCPassConfig::addPreEmitPass() {
addPass(createPPCBranchSelectionPass(), false);
}
-void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our PPC pass. This
- // allows the PPC pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createPPCTargetTransformInfoPass(this));
+TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis(
+ [this](Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); });
}
-
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 5095d736a651..7a4905889891 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -24,30 +24,35 @@ namespace llvm {
/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
///
class PPCTargetMachine : public LLVMTargetMachine {
+public:
+ enum PPCABI { PPC_ABI_UNKNOWN, PPC_ABI_ELFv1, PPC_ABI_ELFv2 };
+private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- PPCSubtarget Subtarget;
-
+ PPCABI TargetABI;
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
- PPCTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL);
~PPCTargetMachine() override;
- const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- /// \brief Register PPC analysis passes with a pass manager.
- void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+ bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
+ bool isPPC64() const {
+ Triple TT(getTargetTriple());
+ return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
+ };
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index 2903cc192aa8..9ad134070082 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -22,7 +22,7 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
InitializeELF(TM.Options.UseInitArray);
}
-const MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
+MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
// Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
index cd84da222751..d248791f2cad 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -22,9 +22,9 @@ namespace llvm {
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
- SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const override;
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
/// \brief Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
index 6493713bfbad..8aaf5e188907 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
class PPCTargetStreamer : public MCTargetStreamer {
public:
PPCTargetStreamer(MCStreamer &S);
- virtual ~PPCTargetStreamer();
+ ~PPCTargetStreamer() override;
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
virtual void emitAbiVersion(int AbiVersion) = 0;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index a7bb2526d9d4..25d563a7d975 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,17 +6,10 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// PPC target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
-#include "PPC.h"
-#include "PPCTargetMachine.h"
+#include "PPCTargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
@@ -28,115 +21,23 @@ using namespace llvm;
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializePPCTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
- const TargetMachine *TM;
- const PPCSubtarget *ST;
- const PPCTargetLowering *TLI;
-
-public:
- PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- PPCTTI(const PPCTargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getSubtargetImpl()->getTargetLowering()) {
- initializePPCTTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override {
- pushTTIStack(this);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo*)this;
- return this;
- }
-
- /// \name Scalar TTI Implementations
- /// @{
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
-
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) const override;
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) const override;
-
- PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
- void getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const override;
-
- /// @}
-
- /// \name Vector TTI Implementations
- /// @{
-
- unsigned getNumberOfRegisters(bool Vector) const override;
- unsigned getRegisterBitWidth(bool Vector) const override;
- unsigned getMaxInterleaveFactor() const override;
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind, OperandValueProperties,
- OperandValueProperties) const override;
- unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
- int Index, Type *SubTp) const override;
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const override;
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const override;
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const override;
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const override;
-
- /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
- "PPC Target Transform Info", true, true, false)
-char PPCTTI::ID = 0;
-
-ImmutablePass *
-llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
- return new PPCTTI(TM);
-}
-
-
//===----------------------------------------------------------------------===//
//
// PPC cost model.
//
//===----------------------------------------------------------------------===//
-PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+TargetTransformInfo::PopcntSupportKind
+PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
if (ST->hasPOPCNTD() && TyWidth <= 64)
- return PSK_FastHardware;
- return PSK_Software;
+ return TTI::PSK_FastHardware;
+ return TTI::PSK_Software;
}
-unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
- return TargetTransformInfo::getIntImmCost(Imm, Ty);
+ return BaseT::getIntImmCost(Imm, Ty);
assert(Ty->isIntegerTy());
@@ -145,28 +46,28 @@ unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
return ~0U;
if (Imm == 0)
- return TCC_Free;
+ return TTI::TCC_Free;
if (Imm.getBitWidth() <= 64) {
if (isInt<16>(Imm.getSExtValue()))
- return TCC_Basic;
+ return TTI::TCC_Basic;
if (isInt<32>(Imm.getSExtValue())) {
// A constant that can be materialized using lis.
if ((Imm.getZExtValue() & 0xFFFF) == 0)
- return TCC_Basic;
+ return TTI::TCC_Basic;
- return 2 * TCC_Basic;
+ return 2 * TTI::TCC_Basic;
}
}
- return 4 * TCC_Basic;
+ return 4 * TTI::TCC_Basic;
}
-unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
+unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
- return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
+ return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
assert(Ty->isIntegerTy());
@@ -175,31 +76,32 @@ unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return ~0U;
switch (IID) {
- default: return TCC_Free;
+ default:
+ return TTI::TCC_Free;
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
- return TCC_Free;
+ return TTI::TCC_Free;
break;
case Intrinsic::experimental_stackmap:
if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
- return TCC_Free;
+ return TTI::TCC_Free;
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
- return TCC_Free;
+ return TTI::TCC_Free;
break;
}
- return PPCTTI::getIntImmCost(Imm, Ty);
+ return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) const {
+unsigned PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
- return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
+ return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
assert(Ty->isIntegerTy());
@@ -211,14 +113,15 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
ZeroFree = false;
switch (Opcode) {
- default: return TCC_Free;
+ default:
+ return TTI::TCC_Free;
case Instruction::GetElementPtr:
// Always hoist the base address of a GetElementPtr. This prevents the
// creation of new constants for every base constant that gets constant
// folded with the offset.
if (Idx == 0)
- return 2 * TCC_Basic;
- return TCC_Free;
+ return 2 * TTI::TCC_Basic;
+ return TTI::TCC_Free;
case Instruction::And:
RunFree = true; // (for the rotate-and-mask instructions)
// Fallthrough...
@@ -250,54 +153,62 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
}
if (ZeroFree && Imm == 0)
- return TCC_Free;
+ return TTI::TCC_Free;
if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
if (isInt<16>(Imm.getSExtValue()))
- return TCC_Free;
+ return TTI::TCC_Free;
if (RunFree) {
if (Imm.getBitWidth() <= 32 &&
(isShiftedMask_32(Imm.getZExtValue()) ||
isShiftedMask_32(~Imm.getZExtValue())))
- return TCC_Free;
-
+ return TTI::TCC_Free;
if (ST->isPPC64() &&
(isShiftedMask_64(Imm.getZExtValue()) ||
isShiftedMask_64(~Imm.getZExtValue())))
- return TCC_Free;
+ return TTI::TCC_Free;
}
if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
- return TCC_Free;
+ return TTI::TCC_Free;
if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
- return TCC_Free;
+ return TTI::TCC_Free;
}
- return PPCTTI::getIntImmCost(Imm, Ty);
+ return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const {
- if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) {
+void PPCTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ if (ST->getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
UP.Partial = UP.Runtime = true;
+
+ // We unroll a lot on the A2 (hundreds of instructions), and the benefits
+ // often outweigh the cost of a division to compute the trip count.
+ UP.AllowExpensiveTripCount = true;
}
- TargetTransformInfo::getUnrollingPreferences(F, L, UP);
+ BaseT::getUnrollingPreferences(L, UP);
}
-unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
- if (Vector && !ST->hasAltivec())
+bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+ return LoopHasReductions;
+}
+
+unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
+ if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
return ST->hasVSX() ? 64 : 32;
}
-unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
if (Vector) {
+ if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
return 0;
}
@@ -308,7 +219,7 @@ unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
}
-unsigned PPCTTI::getMaxInterleaveFactor() const {
+unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
unsigned Directive = ST->getDarwinDirective();
// The 440 has no SIMD support, but floating-point instructions
// have a 5-cycle latency, so unroll by 5x for latency hiding.
@@ -324,40 +235,46 @@ unsigned PPCTTI::getMaxInterleaveFactor() const {
if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
return 1;
+ // For P7 and P8, floating-point instructions have a 6-cycle latency and
+ // there are two execution units, so unroll by 12x for latency hiding.
+ if (Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8)
+ return 12;
+
// For most things, modern systems have two execution units (and
// out-of-order execution).
return 2;
}
-unsigned PPCTTI::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
- OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) const {
+unsigned PPCTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
- return TargetTransformInfo::getArithmeticInstrCost(
- Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
}
-unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) const {
- return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+unsigned PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+unsigned PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
- return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+unsigned PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) {
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
+unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -368,7 +285,13 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index == 0)
return 0;
- return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
+ // Floating point scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
// Estimated cost of a load-hit-store delay. This was obtained
@@ -385,21 +308,20 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
// these need to be estimated as very costly.
if (ISD == ISD::EXTRACT_VECTOR_ELT ||
ISD == ISD::INSERT_VECTOR_ELT)
- return LHSPenalty +
- TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+ return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
- return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
-unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const {
+unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
- unsigned Cost =
- TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ unsigned Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
// VSX loads/stores support unaligned access.
if (ST->hasVSX()) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
new file mode 100644
index 000000000000..35e7a1497c83
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -0,0 +1,104 @@
+//===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// PPC target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
+
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
+ typedef BasicTTIImplBase<PPCTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const PPCSubtarget *ST;
+ const PPCTargetLowering *TLI;
+
+ const PPCSubtarget *getST() const { return ST; }
+ const PPCTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F)
+ : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ PPCTTIImpl(const PPCTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ PPCTTIImpl(PPCTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+ PPCTTIImpl &operator=(const PPCTTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ PPCTTIImpl &operator=(PPCTTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ using BaseT::getIntImmCost;
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ bool enableAggressiveInterleaving(bool LoopHasReductions);
+ unsigned getNumberOfRegisters(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector);
+ unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp);
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
new file mode 100644
index 000000000000..5e3ae2a4471b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -0,0 +1,176 @@
+//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass which deals with the complexity of generating legal VSX register
+// copies to/from register classes which partially overlap with the VSX
+// register file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCHazardRecognizers.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace llvm {
+ void initializePPCVSXCopyPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopy() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
+ }
+
+ const TargetInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
+
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
+
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (!MI->isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if ( IsVSReg(DstMO.getReg(), MRI) &&
+ !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC =
+ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) ||
+ IsVRReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .addOperand(SrcMO)
+ .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC =
+ IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVRReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg)
+ .addOperand(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+ }
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // If we don't have VSX on the subtarget, don't do anything.
+ const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+ if (!STI.hasVSX())
+ return false;
+ TII = STI.getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+ "PowerPC VSX Copy Legalization", false, false)
+
+char PPCVSXCopy::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
new file mode 100644
index 000000000000..f352fa647ace
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -0,0 +1,335 @@
+//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mutates the form of VSX FMA instructions to avoid unnecessary
+// copies.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation",
+cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden);
+
+#define DEBUG_TYPE "ppc-vsx-fma-mutate"
+
+namespace llvm { namespace PPC {
+ int getAltVSXFMAOpcode(uint16_t Opcode);
+} }
+
+namespace {
+ // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXFMAMutate : public MachineFunctionPass {
+ static char ID;
+ PPCVSXFMAMutate() : MachineFunctionPass(ID) {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ }
+
+ LiveIntervals *LIS;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+
+ // The default (A-type) VSX FMA form kills the addend (it is taken from
+ // the target register, which is then updated to reflect the result of
+ // the FMA). If the instruction, however, kills one of the registers
+ // used for the product, then we can use the M-form instruction (which
+ // will take that value from the to-be-defined register).
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ continue;
+
+ // This pass is run after register coalescing, and so we're looking for
+ // a situation like this:
+ // ...
+ // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // ...
+ // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19,
+ // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19
+ // ...
+ // Where we can eliminate the copy by changing from the A-type to the
+ // M-type instruction. Specifically, for this example, this means:
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // is replaced by:
+ // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9,
+ // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
+ // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+
+ SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
+
+ VNInfo *AddendValNo =
+ LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+ MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
+
+ // The addend and this instruction must be in the same block.
+
+ if (!AddendMI || AddendMI->getParent() != MI->getParent())
+ continue;
+
+ // The addend must be a full copy within the same register class.
+
+ if (!AddendMI->isFullCopy())
+ continue;
+
+ unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+ if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
+ MRI.getRegClass(AddendSrcReg))
+ continue;
+ } else {
+ // If AddendSrcReg is a physical register, make sure the destination
+ // register class contains it.
+ if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
+ ->contains(AddendSrcReg))
+ continue;
+ }
+
+ // In theory, there could be other uses of the addend copy before this
+ // fma. We could deal with this, but that would require additional
+ // logic below and I suspect it will not occur in any relevant
+ // situations. Additionally, check whether the copy source is killed
+ // prior to the fma. In order to replace the addend here with the
+ // source of the copy, it must still be live here. We can't use
+ // interval testing for a physical register, so as long as we're
+ // walking the MIs we may as well test liveness here.
+ bool OtherUsers = false, KillsAddendSrc = false;
+ for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
+ J != JE; --J) {
+ if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
+ OtherUsers = true;
+ break;
+ }
+ if (J->modifiesRegister(AddendSrcReg, TRI) ||
+ J->killsRegister(AddendSrcReg, TRI)) {
+ KillsAddendSrc = true;
+ break;
+ }
+ }
+
+ if (OtherUsers || KillsAddendSrc)
+ continue;
+
+ // Find one of the product operands that is killed by this instruction.
+
+ unsigned KilledProdOp = 0, OtherProdOp = 0;
+ if (LIS->getInterval(MI->getOperand(2).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 2;
+ OtherProdOp = 3;
+ } else if (LIS->getInterval(MI->getOperand(3).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 3;
+ OtherProdOp = 2;
+ }
+
+ // If there are no killed product operands, then this transformation is
+ // likely not profitable.
+ if (!KilledProdOp)
+ continue;
+
+ // For virtual registers, verify that the addend source register
+ // is live here (as should have been assured above).
+ assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
+ LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
+ "Addend source register is not live!");
+
+ // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
+
+ unsigned AddReg = AddendMI->getOperand(1).getReg();
+ unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
+ unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg();
+
+ unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
+ unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
+ unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg();
+
+ bool AddRegKill = AddendMI->getOperand(1).isKill();
+ bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
+ bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill();
+
+ bool AddRegUndef = AddendMI->getOperand(1).isUndef();
+ bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
+ bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef();
+
+ unsigned OldFMAReg = MI->getOperand(0).getReg();
+
+ // The transformation doesn't work well with things like:
+ // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
+ // so leave such things alone.
+ if (OldFMAReg == KilledProdReg)
+ continue;
+
+ assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
+ "Addend copy not tied to old FMA output!");
+
+ DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;);
+
+ MI->getOperand(0).setReg(KilledProdReg);
+ MI->getOperand(1).setReg(KilledProdReg);
+ MI->getOperand(3).setReg(AddReg);
+ MI->getOperand(2).setReg(OtherProdReg);
+
+ MI->getOperand(0).setSubReg(KilledProdSubReg);
+ MI->getOperand(1).setSubReg(KilledProdSubReg);
+ MI->getOperand(3).setSubReg(AddSubReg);
+ MI->getOperand(2).setSubReg(OtherProdSubReg);
+
+ MI->getOperand(1).setIsKill(KilledProdRegKill);
+ MI->getOperand(3).setIsKill(AddRegKill);
+ MI->getOperand(2).setIsKill(OtherProdRegKill);
+
+ MI->getOperand(1).setIsUndef(KilledProdRegUndef);
+ MI->getOperand(3).setIsUndef(AddRegUndef);
+ MI->getOperand(2).setIsUndef(OtherProdRegUndef);
+
+ MI->setDesc(TII->get(AltOpc));
+
+ DEBUG(dbgs() << " -> " << *MI);
+
+ // The killed product operand was killed here, so we can reuse it now
+ // for the result of the fma.
+
+ LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
+ VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
+ for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
+ UI != UE;) {
+ MachineOperand &UseMO = *UI;
+ MachineInstr *UseMI = UseMO.getParent();
+ ++UI;
+
+ // Don't replace the result register of the copy we're about to erase.
+ if (UseMI == AddendMI)
+ continue;
+
+ UseMO.setReg(KilledProdReg);
+ UseMO.setSubReg(KilledProdSubReg);
+ }
+
+ // Extend the live intervals of the killed product operand to hold the
+ // fma result.
+
+ LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
+ for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
+ AI != AE; ++AI) {
+ // Don't add the segment that corresponds to the original copy.
+ if (AI->valno == AddendValNo)
+ continue;
+
+ VNInfo *NewFMAValNo =
+ NewFMAInt.getNextValue(AI->start,
+ LIS->getVNInfoAllocator());
+
+ NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
+ NewFMAValNo));
+ }
+ DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
+
+ FMAInt.removeValNo(FMAValNo);
+ DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
+
+ // Remove the (now unused) copy.
+
+ DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
+ LIS->RemoveMachineInstrFromMaps(AddendMI);
+ AddendMI->eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // If we don't have VSX then go ahead and return without doing
+ // anything.
+ const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+ if (!STI.hasVSX())
+ return false;
+
+ LIS = &getAnalysis<LiveIntervals>();
+
+ TII = STI.getInstrInfo();
+
+ bool Changed = false;
+
+ if (DisableVSXFMAMutate)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+
+char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
+
+char PPCVSXFMAMutate::ID = 0;
+FunctionPass*
+llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
+
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
new file mode 100644
index 000000000000..e238669145ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -0,0 +1,821 @@
+//===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass analyzes vector computations and removes unnecessary
+// doubleword swaps (xxswapd instructions). This pass is performed
+// only for little-endian VSX code generation.
+//
+// For this specific case, loads and stores of v4i32, v4f32, v2i64,
+// and v2f64 vectors are inefficient. These are implemented using
+// the lxvd2x and stxvd2x instructions, which invert the order of
+// doublewords in a vector register. Thus code generation inserts
+// an xxswapd after each such load, and prior to each such store.
+//
+// The extra xxswapd instructions reduce performance. The purpose
+// of this pass is to reduce the number of xxswapd instructions
+// required for correctness.
+//
+// The primary insight is that much code that operates on vectors
+// does not care about the relative order of elements in a register,
+// so long as the correct memory order is preserved. If we have a
+// computation where all input values are provided by lxvd2x/xxswapd,
+// all outputs are stored using xxswapd/lxvd2x, and all intermediate
+// computations are lane-insensitive (independent of element order),
+// then all the xxswapd instructions associated with the loads and
+// stores may be removed without changing observable semantics.
+//
+// This pass uses standard equivalence class infrastructure to create
+// maximal webs of computations fitting the above description. Each
+// such web is then optimized by removing its unnecessary xxswapd
+// instructions.
+//
+// There are some lane-sensitive operations for which we can still
+// permit the optimization, provided we modify those operations
+// accordingly. Such operations are identified as using "special
+// handling" within this module.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-vsx-swaps"
+
+namespace llvm {
+ void initializePPCVSXSwapRemovalPass(PassRegistry&);
+}
+
+namespace {
+
+// A PPCVSXSwapEntry is created for each machine instruction that
+// is relevant to a vector computation.
+struct PPCVSXSwapEntry {
+ // Pointer to the instruction.
+ MachineInstr *VSEMI;
+
+ // Unique ID (position in the swap vector).
+ int VSEId;
+
+ // Attributes of this node.
+ unsigned int IsLoad : 1;
+ unsigned int IsStore : 1;
+ unsigned int IsSwap : 1;
+ unsigned int MentionsPhysVR : 1;
+ unsigned int HasImplicitSubreg : 1;
+ unsigned int IsSwappable : 1;
+ unsigned int SpecialHandling : 3;
+ unsigned int WebRejected : 1;
+ unsigned int WillRemove : 1;
+};
+
+enum SHValues {
+ SH_NONE = 0,
+ SH_EXTRACT,
+ SH_INSERT,
+ SH_NOSWAP_LD,
+ SH_NOSWAP_ST,
+ SH_SPLAT
+};
+
+struct PPCVSXSwapRemoval : public MachineFunctionPass {
+
+ static char ID;
+ const PPCInstrInfo *TII;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+
+ // Swap entries are allocated in a vector for better performance.
+ std::vector<PPCVSXSwapEntry> SwapVector;
+
+ // A mapping is maintained between machine instructions and
+ // their swap entries. The key is the address of the MI.
+ DenseMap<MachineInstr*, int> SwapMap;
+
+ // Equivalence classes are used to gather webs of related computation.
+ // Swap entries are represented by their VSEId fields.
+ EquivalenceClasses<int> *EC;
+
+ PPCVSXSwapRemoval() : MachineFunctionPass(ID) {
+ initializePPCVSXSwapRemovalPass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize data structures.
+ void initialize(MachineFunction &MFParm);
+
+ // Walk the machine instructions to gather vector usage information.
+ // Return true iff vector mentions are present.
+ bool gatherVectorInstructions();
+
+ // Add an entry to the swap vector and swap map.
+ int addSwapEntry(MachineInstr *MI, PPCVSXSwapEntry &SwapEntry);
+
+ // Hunt backwards through COPY and SUBREG_TO_REG chains for a
+ // source register. VecIdx indicates the swap vector entry to
+ // mark as mentioning a physical register if the search leads
+ // to one.
+ unsigned lookThruCopyLike(unsigned SrcReg, unsigned VecIdx);
+
+ // Generate equivalence classes for related computations (webs).
+ void formWebs();
+
+ // Analyze webs and determine those that cannot be optimized.
+ void recordUnoptimizableWebs();
+
+ // Record which swap instructions can be safely removed.
+ void markSwapsForRemoval();
+
+ // Remove swaps and update other instructions requiring special
+ // handling. Return true iff any changes are made.
+ bool removeSwaps();
+
+ // Update instructions requiring special handling.
+ void handleSpecialSwappables(int EntryIdx);
+
+ // Dump a description of the entries in the swap vector.
+ void dumpSwapVector();
+
+ // Return true iff the given register is in the given class.
+ bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return RC->hasSubClassEq(MRI->getRegClass(Reg));
+ if (RC->contains(Reg))
+ return true;
+ return false;
+ }
+
+ // Return true iff the given register is a full vector register.
+ bool isVecReg(unsigned Reg) {
+ return (isRegInClass(Reg, &PPC::VSRCRegClass) ||
+ isRegInClass(Reg, &PPC::VRRCRegClass));
+ }
+
+public:
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // If we don't have VSX on the subtarget, don't do anything.
+ const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+ if (!STI.hasVSX())
+ return false;
+
+ bool Changed = false;
+ initialize(MF);
+
+ if (gatherVectorInstructions()) {
+ formWebs();
+ recordUnoptimizableWebs();
+ markSwapsForRemoval();
+ Changed = removeSwaps();
+ }
+
+ // FIXME: See the allocation of EC in initialize().
+ delete EC;
+ return Changed;
+ }
+};
+
+// Initialize data structures for this pass. In particular, clear the
+// swap vector and allocate the equivalence class mapping before
+// processing each function.
+void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ MRI = &MF->getRegInfo();
+ TII = static_cast<const PPCInstrInfo*>(MF->getSubtarget().getInstrInfo());
+
+ // An initial vector size of 256 appears to work well in practice.
+ // Small/medium functions with vector content tend not to incur a
+ // reallocation at this size. Three of the vector tests in
+ // projects/test-suite reallocate, which seems like a reasonable rate.
+ const int InitialVectorSize(256);
+ SwapVector.clear();
+ SwapVector.reserve(InitialVectorSize);
+
+ // FIXME: Currently we allocate EC each time because we don't have
+ // access to the set representation on which to call clear(). Should
+ // consider adding a clear() method to the EquivalenceClasses class.
+ EC = new EquivalenceClasses<int>;
+}
+
+// Create an entry in the swap vector for each instruction that mentions
+// a full vector register, recording various characteristics of the
+// instructions there.
+bool PPCVSXSwapRemoval::gatherVectorInstructions() {
+ bool RelevantFunction = false;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+
+ bool RelevantInstr = false;
+ bool ImplicitSubreg = false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (isVecReg(Reg)) {
+ RelevantInstr = true;
+ if (MO.getSubReg() != 0)
+ ImplicitSubreg = true;
+ break;
+ }
+ }
+
+ if (!RelevantInstr)
+ continue;
+
+ RelevantFunction = true;
+
+ // Create a SwapEntry initialized to zeros, then fill in the
+ // instruction and ID fields before pushing it to the back
+ // of the swap vector.
+ PPCVSXSwapEntry SwapEntry{};
+ int VecIdx = addSwapEntry(&MI, SwapEntry);
+
+ if (ImplicitSubreg)
+ SwapVector[VecIdx].HasImplicitSubreg = 1;
+
+ switch(MI.getOpcode()) {
+ default:
+ // Unless noted otherwise, an instruction is considered
+ // safe for the optimization. There are a large number of
+ // such true-SIMD instructions (all vector math, logical,
+ // select, compare, etc.).
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
+ case PPC::XXPERMDI:
+ // This is a swap if it is of the form XXPERMDI t, s, s, 2.
+ // Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we
+ // can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2,
+ // for example. We have to look through chains of COPY and
+ // SUBREG_TO_REG to find the real source value for comparison.
+ // If the real source value is a physical register, then mark the
+ // XXPERMDI as mentioning a physical register.
+ // Any other form of XXPERMDI is lane-sensitive and unsafe
+ // for the optimization.
+ if (MI.getOperand(3).getImm() == 2) {
+ unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
+ VecIdx);
+ unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
+ VecIdx);
+ if (trueReg1 == trueReg2)
+ SwapVector[VecIdx].IsSwap = 1;
+ }
+ break;
+ case PPC::LVX:
+ // Non-permuting loads are currently unsafe. We can use special
+ // handling for this in the future. By not marking these as
+ // IsSwap, we ensure computations containing them will be rejected
+ // for now.
+ SwapVector[VecIdx].IsLoad = 1;
+ break;
+ case PPC::LXVD2X:
+ case PPC::LXVW4X:
+ // Permuting loads are marked as both load and swap, and are
+ // safe for optimization.
+ SwapVector[VecIdx].IsLoad = 1;
+ SwapVector[VecIdx].IsSwap = 1;
+ break;
+ case PPC::STVX:
+ // Non-permuting stores are currently unsafe. We can use special
+ // handling for this in the future. By not marking these as
+ // IsSwap, we ensure computations containing them will be rejected
+ // for now.
+ SwapVector[VecIdx].IsStore = 1;
+ break;
+ case PPC::STXVD2X:
+ case PPC::STXVW4X:
+ // Permuting stores are marked as both store and swap, and are
+ // safe for optimization.
+ SwapVector[VecIdx].IsStore = 1;
+ SwapVector[VecIdx].IsSwap = 1;
+ break;
+ case PPC::SUBREG_TO_REG:
+ // These are fine provided they are moving between full vector
+ // register classes. For example, the VRs are a subset of the
+ // VSRs, but each VR and each VSR is a full 128-bit register.
+ if (isVecReg(MI.getOperand(0).getReg()) &&
+ isVecReg(MI.getOperand(2).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
+ case PPC::COPY:
+ // These are fine provided they are moving between full vector
+ // register classes.
+ if (isVecReg(MI.getOperand(0).getReg()) &&
+ isVecReg(MI.getOperand(1).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
+ case PPC::VSPLTB:
+ case PPC::VSPLTH:
+ case PPC::VSPLTW:
+ // Splats are lane-sensitive, but we can use special handling
+ // to adjust the source lane for the splat. This is not yet
+ // implemented. When it is, we need to uncomment the following:
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
+ break;
+ // The presence of the following lane-sensitive operations in a
+ // web will kill the optimization, at least for now. For these
+ // we do nothing, causing the optimization to fail.
+ // FIXME: Some of these could be permitted with special handling,
+ // and will be phased in as time permits.
+ // FIXME: There is no simple and maintainable way to express a set
+ // of opcodes having a common attribute in TableGen. Should this
+ // change, this is a prime candidate to use such a mechanism.
+ case PPC::INLINEASM:
+ case PPC::EXTRACT_SUBREG:
+ case PPC::INSERT_SUBREG:
+ case PPC::COPY_TO_REGCLASS:
+ case PPC::LVEBX:
+ case PPC::LVEHX:
+ case PPC::LVEWX:
+ case PPC::LVSL:
+ case PPC::LVSR:
+ case PPC::LVXL:
+ case PPC::LXVDSX:
+ case PPC::STVEBX:
+ case PPC::STVEHX:
+ case PPC::STVEWX:
+ case PPC::STVXL:
+ case PPC::STXSDX:
+ case PPC::VCIPHER:
+ case PPC::VCIPHERLAST:
+ case PPC::VMRGHB:
+ case PPC::VMRGHH:
+ case PPC::VMRGHW:
+ case PPC::VMRGLB:
+ case PPC::VMRGLH:
+ case PPC::VMRGLW:
+ case PPC::VMULESB:
+ case PPC::VMULESH:
+ case PPC::VMULESW:
+ case PPC::VMULEUB:
+ case PPC::VMULEUH:
+ case PPC::VMULEUW:
+ case PPC::VMULOSB:
+ case PPC::VMULOSH:
+ case PPC::VMULOSW:
+ case PPC::VMULOUB:
+ case PPC::VMULOUH:
+ case PPC::VMULOUW:
+ case PPC::VNCIPHER:
+ case PPC::VNCIPHERLAST:
+ case PPC::VPERM:
+ case PPC::VPERMXOR:
+ case PPC::VPKPX:
+ case PPC::VPKSHSS:
+ case PPC::VPKSHUS:
+ case PPC::VPKSDSS:
+ case PPC::VPKSDUS:
+ case PPC::VPKSWSS:
+ case PPC::VPKSWUS:
+ case PPC::VPKUDUM:
+ case PPC::VPKUDUS:
+ case PPC::VPKUHUM:
+ case PPC::VPKUHUS:
+ case PPC::VPKUWUM:
+ case PPC::VPKUWUS:
+ case PPC::VPMSUMB:
+ case PPC::VPMSUMD:
+ case PPC::VPMSUMH:
+ case PPC::VPMSUMW:
+ case PPC::VRLB:
+ case PPC::VRLD:
+ case PPC::VRLH:
+ case PPC::VRLW:
+ case PPC::VSBOX:
+ case PPC::VSHASIGMAD:
+ case PPC::VSHASIGMAW:
+ case PPC::VSL:
+ case PPC::VSLDOI:
+ case PPC::VSLO:
+ case PPC::VSR:
+ case PPC::VSRO:
+ case PPC::VSUM2SWS:
+ case PPC::VSUM4SBS:
+ case PPC::VSUM4SHS:
+ case PPC::VSUM4UBS:
+ case PPC::VSUMSWS:
+ case PPC::VUPKHPX:
+ case PPC::VUPKHSB:
+ case PPC::VUPKHSH:
+ case PPC::VUPKHSW:
+ case PPC::VUPKLPX:
+ case PPC::VUPKLSB:
+ case PPC::VUPKLSH:
+ case PPC::VUPKLSW:
+ case PPC::XXMRGHW:
+ case PPC::XXMRGLW:
+ case PPC::XXSPLTW:
+ break;
+ }
+ }
+ }
+
+ if (RelevantFunction) {
+ DEBUG(dbgs() << "Swap vector when first built\n\n");
+ dumpSwapVector();
+ }
+
+ return RelevantFunction;
+}
+
+// Add an entry to the swap vector and swap map, and make a
+// singleton equivalence class for the entry.
+int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI,
+ PPCVSXSwapEntry& SwapEntry) {
+ SwapEntry.VSEMI = MI;
+ SwapEntry.VSEId = SwapVector.size();
+ SwapVector.push_back(SwapEntry);
+ EC->insert(SwapEntry.VSEId);
+ SwapMap[MI] = SwapEntry.VSEId;
+ return SwapEntry.VSEId;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg). Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register. If a
+// physical register is encountered, we stop the search and
+// flag the swap entry indicated by VecIdx (the original
+// XXPERMDI) as mentioning a physical register. Similarly
+// for implicit subregister mentions (which should never
+// happen).
+unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
+ unsigned VecIdx) {
+ MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ unsigned CopySrcReg, CopySrcSubreg;
+ if (MI->isCopy()) {
+ CopySrcReg = MI->getOperand(1).getReg();
+ CopySrcSubreg = MI->getOperand(1).getSubReg();
+ } else {
+ assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ CopySrcSubreg = MI->getOperand(2).getSubReg();
+ }
+
+ if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
+ SwapVector[VecIdx].MentionsPhysVR = 1;
+ return CopySrcReg;
+ }
+
+ if (CopySrcSubreg != 0) {
+ SwapVector[VecIdx].HasImplicitSubreg = 1;
+ return CopySrcReg;
+ }
+
+ return lookThruCopyLike(CopySrcReg, VecIdx);
+}
+
+// Generate equivalence classes for related computations (webs) by
+// def-use relationships of virtual registers. Mention of a physical
+// register terminates the generation of equivalence classes as this
+// indicates a use of a parameter, definition of a return value, use
+// of a value returned from a call, or definition of a parameter to a
+// call. Computations with physical register mentions are flagged
+// as such so their containing webs will not be optimized.
+void PPCVSXSwapRemoval::formWebs() {
+
+ DEBUG(dbgs() << "\n*** Forming webs for swap removal ***\n\n");
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "\n" << SwapVector[EntryIdx].VSEId << " ");
+ DEBUG(MI->dump());
+
+ // It's sufficient to walk vector uses and join them to their unique
+ // definitions. In addition, check *all* vector register operands
+ // for physical regs.
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (!isVecReg(Reg))
+ continue;
+
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ SwapVector[EntryIdx].MentionsPhysVR = 1;
+ continue;
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ MachineInstr* DefMI = MRI->getVRegDef(Reg);
+ assert(SwapMap.find(DefMI) != SwapMap.end() &&
+ "Inconsistency: def of vector reg not found in swap map!");
+ int DefIdx = SwapMap[DefMI];
+ (void)EC->unionSets(SwapVector[DefIdx].VSEId,
+ SwapVector[EntryIdx].VSEId);
+
+ DEBUG(dbgs() << format("Unioning %d with %d\n", SwapVector[DefIdx].VSEId,
+ SwapVector[EntryIdx].VSEId));
+ DEBUG(dbgs() << " Def: ");
+ DEBUG(DefMI->dump());
+ }
+ }
+}
+
+// Walk the swap vector entries looking for conditions that prevent their
+// containing computations from being optimized. When such conditions are
+// found, mark the representative of the computation's equivalence class
+// as rejected.
+void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
+
+ DEBUG(dbgs() << "\n*** Rejecting webs for swap removal ***\n\n");
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+ int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+ // Reject webs containing mentions of physical registers or implicit
+ // subregs, or containing operations that we don't know how to handle
+ // in a lane-permuted region.
+ if (SwapVector[EntryIdx].MentionsPhysVR ||
+ SwapVector[EntryIdx].HasImplicitSubreg ||
+ !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
+
+ SwapVector[Repr].WebRejected = 1;
+
+ DEBUG(dbgs() <<
+ format("Web %d rejected for physreg, subreg, or not swap[pable]\n",
+ Repr));
+ DEBUG(dbgs() << " in " << EntryIdx << ": ");
+ DEBUG(SwapVector[EntryIdx].VSEMI->dump());
+ DEBUG(dbgs() << "\n");
+ }
+
+ // Reject webs than contain swapping loads that feed something other
+ // than a swap instruction.
+ else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned DefReg = MI->getOperand(0).getReg();
+
+ // We skip debug instructions in the analysis. (Note that debug
+ // location information is still maintained by this optimization
+ // because it remains on the LXVD2X and STXVD2X instructions after
+ // the XXPERMDIs are removed.)
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
+ int UseIdx = SwapMap[&UseMI];
+
+ if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
+ SwapVector[UseIdx].IsStore) {
+
+ SwapVector[Repr].WebRejected = 1;
+
+ DEBUG(dbgs() <<
+ format("Web %d rejected for load not feeding swap\n", Repr));
+ DEBUG(dbgs() << " def " << EntryIdx << ": ");
+ DEBUG(MI->dump());
+ DEBUG(dbgs() << " use " << UseIdx << ": ");
+ DEBUG(UseMI.dump());
+ DEBUG(dbgs() << "\n");
+ }
+ }
+
+ // Reject webs than contain swapping stores that are fed by something
+ // other than a swap instruction.
+ } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned UseReg = MI->getOperand(0).getReg();
+ MachineInstr *DefMI = MRI->getVRegDef(UseReg);
+ int DefIdx = SwapMap[DefMI];
+
+ if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
+ SwapVector[DefIdx].IsStore) {
+
+ SwapVector[Repr].WebRejected = 1;
+
+ DEBUG(dbgs() <<
+ format("Web %d rejected for store not fed by swap\n", Repr));
+ DEBUG(dbgs() << " def " << DefIdx << ": ");
+ DEBUG(DefMI->dump());
+ DEBUG(dbgs() << " use " << EntryIdx << ": ");
+ DEBUG(MI->dump());
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Swap vector after web analysis:\n\n");
+ dumpSwapVector();
+}
+
+// Walk the swap vector entries looking for swaps fed by permuting loads
+// and swaps that feed permuting stores. If the containing computation
+// has not been marked rejected, mark each such swap for removal.
+// (Removal is delayed in case optimization has disturbed the pattern,
+// such that multiple loads feed the same swap, etc.)
+void PPCVSXSwapRemoval::markSwapsForRemoval() {
+
+ DEBUG(dbgs() << "\n*** Marking swaps for removal ***\n\n");
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+ if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
+ int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+ if (!SwapVector[Repr].WebRejected) {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned DefReg = MI->getOperand(0).getReg();
+
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
+ int UseIdx = SwapMap[&UseMI];
+ SwapVector[UseIdx].WillRemove = 1;
+
+ DEBUG(dbgs() << "Marking swap fed by load for removal: ");
+ DEBUG(UseMI.dump());
+ }
+ }
+
+ } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
+ int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+ if (!SwapVector[Repr].WebRejected) {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned UseReg = MI->getOperand(0).getReg();
+ MachineInstr *DefMI = MRI->getVRegDef(UseReg);
+ int DefIdx = SwapMap[DefMI];
+ SwapVector[DefIdx].WillRemove = 1;
+
+ DEBUG(dbgs() << "Marking swap feeding store for removal: ");
+ DEBUG(DefMI->dump());
+ }
+
+ } else if (SwapVector[EntryIdx].IsSwappable &&
+ SwapVector[EntryIdx].SpecialHandling != 0) {
+ int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+ if (!SwapVector[Repr].WebRejected)
+ handleSpecialSwappables(EntryIdx);
+ }
+ }
+}
+
+// The identified swap entry requires special handling to allow its
+// containing computation to be optimized. Perform that handling
+// here.
+// FIXME: This code is to be phased in with subsequent patches.
+void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
+ switch (SwapVector[EntryIdx].SpecialHandling) {
+
+ default:
+ assert(false && "Unexpected special handling type");
+ break;
+
+ // For splats based on an index into a vector, add N/2 modulo N
+ // to the index, where N is the number of vector elements.
+ case SHValues::SH_SPLAT: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned NElts;
+
+ DEBUG(dbgs() << "Changing splat: ");
+ DEBUG(MI->dump());
+
+ switch (MI->getOpcode()) {
+ default:
+ assert(false && "Unexpected splat opcode");
+ case PPC::VSPLTB: NElts = 16; break;
+ case PPC::VSPLTH: NElts = 8; break;
+ case PPC::VSPLTW: NElts = 4; break;
+ }
+
+ unsigned EltNo = MI->getOperand(1).getImm();
+ EltNo = (EltNo + NElts / 2) % NElts;
+ MI->getOperand(1).setImm(EltNo);
+
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+ break;
+ }
+
+ }
+}
+
+// Walk the swap vector and replace each entry marked for removal with
+// a copy operation.
+bool PPCVSXSwapRemoval::removeSwaps() {
+
+ DEBUG(dbgs() << "\n*** Removing swaps ***\n\n");
+
+ bool Changed = false;
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+ if (SwapVector[EntryIdx].WillRemove) {
+ Changed = true;
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ MachineBasicBlock *MBB = MI->getParent();
+ BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+ .addOperand(MI->getOperand(1));
+
+ DEBUG(dbgs() << format("Replaced %d with copy: ",
+ SwapVector[EntryIdx].VSEId));
+ DEBUG(MI->dump());
+
+ MI->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+// For debug purposes, dump the contents of the swap vector.
+void PPCVSXSwapRemoval::dumpSwapVector() {
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ int ID = SwapVector[EntryIdx].VSEId;
+
+ DEBUG(dbgs() << format("%6d", ID));
+ DEBUG(dbgs() << format("%6d", EC->getLeaderValue(ID)));
+ DEBUG(dbgs() << format(" BB#%3d", MI->getParent()->getNumber()));
+ DEBUG(dbgs() << format(" %14s ", TII->getName(MI->getOpcode())));
+
+ if (SwapVector[EntryIdx].IsLoad)
+ DEBUG(dbgs() << "load ");
+ if (SwapVector[EntryIdx].IsStore)
+ DEBUG(dbgs() << "store ");
+ if (SwapVector[EntryIdx].IsSwap)
+ DEBUG(dbgs() << "swap ");
+ if (SwapVector[EntryIdx].MentionsPhysVR)
+ DEBUG(dbgs() << "physreg ");
+ if (SwapVector[EntryIdx].HasImplicitSubreg)
+ DEBUG(dbgs() << "implsubreg ");
+
+ if (SwapVector[EntryIdx].IsSwappable) {
+ DEBUG(dbgs() << "swappable ");
+ switch(SwapVector[EntryIdx].SpecialHandling) {
+ default:
+ DEBUG(dbgs() << "special:**unknown**");
+ break;
+ case SH_NONE:
+ break;
+ case SH_EXTRACT:
+ DEBUG(dbgs() << "special:extract ");
+ break;
+ case SH_INSERT:
+ DEBUG(dbgs() << "special:insert ");
+ break;
+ case SH_NOSWAP_LD:
+ DEBUG(dbgs() << "special:load ");
+ break;
+ case SH_NOSWAP_ST:
+ DEBUG(dbgs() << "special:store ");
+ break;
+ case SH_SPLAT:
+ DEBUG(dbgs() << "special:splat ");
+ break;
+ }
+ }
+
+ if (SwapVector[EntryIdx].WebRejected)
+ DEBUG(dbgs() << "rejected ");
+ if (SwapVector[EntryIdx].WillRemove)
+ DEBUG(dbgs() << "remove ");
+
+ DEBUG(dbgs() << "\n");
+
+ // For no-asserts builds.
+ (void)MI;
+ (void)ID;
+ }
+
+ DEBUG(dbgs() << "\n");
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
+ "PowerPC VSX Swap Removal", false, false)
+INITIALIZE_PASS_END(PPCVSXSwapRemoval, DEBUG_TYPE,
+ "PowerPC VSX Swap Removal", false, false)
+
+char PPCVSXSwapRemoval::ID = 0;
+FunctionPass*
+llvm::createPPCVSXSwapRemovalPass() { return new PPCVSXSwapRemoval(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 5727dbc4170d..5b2fe19837ef 100644
--- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -14,7 +14,7 @@ using namespace llvm;
Target llvm::ThePPC32Target, llvm::ThePPC64Target, llvm::ThePPC64LETarget;
-extern "C" void LLVMInitializePowerPCTargetInfo() {
+extern "C" void LLVMInitializePowerPCTargetInfo() {
RegisterTarget<Triple::ppc, /*HasJIT=*/true>
X(ThePPC32Target, "ppc32", "PowerPC 32");