aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-01-17 20:45:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-01-17 20:45:01 +0000
commit706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch)
tree4adf86a776049cbf7f69a1929c4babcbbef925eb /llvm/lib/Target/PowerPC
parent7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff)
downloadsrc-706b4fc47bbc608932d3b491ae19a3b9cde9497b.tar.gz
src-706b4fc47bbc608932d3b491ae19a3b9cde9497b.zip
Vendor import of llvm-project master e26a78e70, the last commit beforevendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085
the llvmorg-11-init tag, from which release/10.x was branched.
Notes
Notes: svn path=/vendor/llvm-project/master/; revision=356843 svn path=/vendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085/; revision=356844; tag=vendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp90
-rw-r--r--llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp8
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h6
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp28
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h7
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp23
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h24
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td229
-rw-r--r--llvm/lib/Target/PowerPC/PPC.h11
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td175
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp356
-rw-r--r--llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCCTRLoops.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp401
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp1357
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h936
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td78
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td121
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFormats.td61
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrHTM.td16
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp549
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h14
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td383
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td190
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp (renamed from llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp)441
-rw-r--r--llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp164
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp390
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp25
-rw-r--r--llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp13
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp35
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h9
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h45
-rw-r--r--llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp20
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp96
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h17
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
43 files changed, 4044 insertions, 2309 deletions
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index aedf5b713c3f..7e7902c27a81 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -800,9 +800,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
- case PPC::SUBICo: {
+ case PPC::SUBIC_rec: {
MCInst TmpInst;
- TmpInst.setOpcode(PPC::ADDICo);
+ TmpInst.setOpcode(PPC::ADDIC_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
addNegOperand(TmpInst, Inst.getOperand(2), getContext());
@@ -810,11 +810,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::EXTLWI:
- case PPC::EXTLWIo: {
+ case PPC::EXTLWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::EXTLWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(B));
@@ -824,11 +824,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::EXTRWI:
- case PPC::EXTRWIo: {
+ case PPC::EXTRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::EXTRWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(B + N));
@@ -838,11 +838,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::INSLWI:
- case PPC::INSLWIo: {
+ case PPC::INSLWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::INSLWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.setOpcode(Opcode == PPC::INSLWI ? PPC::RLWIMI : PPC::RLWIMI_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
@@ -853,11 +853,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::INSRWI:
- case PPC::INSRWIo: {
+ case PPC::INSRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::INSRWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.setOpcode(Opcode == PPC::INSRWI ? PPC::RLWIMI : PPC::RLWIMI_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
@@ -868,10 +868,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::ROTRWI:
- case PPC::ROTRWIo: {
+ case PPC::ROTRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::ROTRWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(32 - N));
@@ -881,10 +881,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::SLWI:
- case PPC::SLWIo: {
+ case PPC::SLWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::SLWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(N));
@@ -894,10 +894,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::SRWI:
- case PPC::SRWIo: {
+ case PPC::SRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::SRWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(32 - N));
@@ -907,10 +907,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::CLRRWI:
- case PPC::CLRRWIo: {
+ case PPC::CLRRWI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::CLRRWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(0));
@@ -920,11 +920,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::CLRLSLWI:
- case PPC::CLRLSLWIo: {
+ case PPC::CLRLSLWI_rec: {
MCInst TmpInst;
int64_t B = Inst.getOperand(2).getImm();
int64_t N = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLWI ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(N));
@@ -934,11 +934,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::EXTLDI:
- case PPC::EXTLDIo: {
+ case PPC::EXTLDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.setOpcode(Opcode == PPC::EXTLDI ? PPC::RLDICR : PPC::RLDICR_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(B));
@@ -947,11 +947,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::EXTRDI:
- case PPC::EXTRDIo: {
+ case PPC::EXTRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.setOpcode(Opcode == PPC::EXTRDI ? PPC::RLDICL : PPC::RLDICL_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(B + N));
@@ -960,11 +960,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::INSRDI:
- case PPC::INSRDIo: {
+ case PPC::INSRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
int64_t B = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::INSRDI? PPC::RLDIMI : PPC::RLDIMIo);
+ TmpInst.setOpcode(Opcode == PPC::INSRDI ? PPC::RLDIMI : PPC::RLDIMI_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
@@ -974,10 +974,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::ROTRDI:
- case PPC::ROTRDIo: {
+ case PPC::ROTRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.setOpcode(Opcode == PPC::ROTRDI ? PPC::RLDICL : PPC::RLDICL_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(64 - N));
@@ -986,10 +986,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::SLDI:
- case PPC::SLDIo: {
+ case PPC::SLDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.setOpcode(Opcode == PPC::SLDI ? PPC::RLDICR : PPC::RLDICR_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(N));
@@ -1007,10 +1007,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::SRDI:
- case PPC::SRDIo: {
+ case PPC::SRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.setOpcode(Opcode == PPC::SRDI ? PPC::RLDICL : PPC::RLDICL_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(64 - N));
@@ -1019,10 +1019,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::CLRRDI:
- case PPC::CLRRDIo: {
+ case PPC::CLRRDI_rec: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.setOpcode(Opcode == PPC::CLRRDI ? PPC::RLDICR : PPC::RLDICR_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(0));
@@ -1031,11 +1031,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::CLRLSLDI:
- case PPC::CLRLSLDIo: {
+ case PPC::CLRLSLDI_rec: {
MCInst TmpInst;
int64_t B = Inst.getOperand(2).getImm();
int64_t N = Inst.getOperand(3).getImm();
- TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo);
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLDI ? PPC::RLDIC : PPC::RLDIC_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(N));
@@ -1044,14 +1044,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::RLWINMbm:
- case PPC::RLWINMobm: {
+ case PPC::RLWINMbm_rec: {
unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))
break;
MCInst TmpInst;
- TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));
@@ -1061,14 +1061,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::RLWIMIbm:
- case PPC::RLWIMIobm: {
+ case PPC::RLWIMIbm_rec: {
unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))
break;
MCInst TmpInst;
- TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMI_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
TmpInst.addOperand(Inst.getOperand(1));
@@ -1079,14 +1079,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::RLWNMbm:
- case PPC::RLWNMobm: {
+ case PPC::RLWNMbm_rec: {
unsigned MB, ME;
int64_t BM = Inst.getOperand(3).getImm();
if (!isRunOfOnes(BM, MB, ME))
break;
MCInst TmpInst;
- TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+ TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNM_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));
@@ -1116,8 +1116,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
case PPC::CP_PASTEx :
case PPC::CP_PASTE_LAST: {
MCInst TmpInst;
- TmpInst.setOpcode(Opcode == PPC::CP_PASTEx ?
- PPC::CP_PASTE : PPC::CP_PASTEo);
+ TmpInst.setOpcode(Opcode == PPC::CP_PASTEx ? PPC::CP_PASTE
+ : PPC::CP_PASTE_rec);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::createImm(Opcode == PPC::CP_PASTEx ? 0 : 1));
@@ -1786,7 +1786,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
/// Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() {
RegisterMCAsmParser<PPCAsmParser> A(getThePPC32Target());
RegisterMCAsmParser<PPCAsmParser> B(getThePPC64Target());
RegisterMCAsmParser<PPCAsmParser> C(getThePPC64LETarget());
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 3597fd15eeb1..e3c0f958c7ed 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -34,7 +34,6 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -51,7 +50,7 @@ static MCDisassembler *createPPCLEDisassembler(const Target &T,
return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/true);
}
-extern "C" void LLVMInitializePowerPCDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() {
// Register the disassembler for each target.
TargetRegistry::RegisterMCDisassembler(getThePPC32Target(),
createPPCDisassembler);
@@ -323,7 +322,7 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
- uint64_t Address, raw_ostream &OS,
+ uint64_t Address,
raw_ostream &CS) const {
// Get the four bytes of the instruction.
Size = 4;
@@ -350,4 +349,3 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
}
-
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 7fc231618fa9..9cc1c539e24a 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -64,8 +64,9 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << RegName;
}
-void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// Customize printing of the addis instruction on AIX. When an operand is a
// symbol reference, the instruction syntax is changed to look like a load
// operation, i.e:
@@ -193,11 +194,10 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
if (!printAliasInstr(MI, O))
- printInstruction(MI, O);
+ printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
-
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O,
const char *Modifier) {
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 725ae2a7081b..a3ec41aa348d 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -32,11 +32,11 @@ public:
: MCInstPrinter(MAI, MII, MRI), TT(T) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 1216cd727289..dc2c216a3efd 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -15,33 +15,6 @@
using namespace llvm;
-void PPCMCAsmInfoDarwin::anchor() { }
-
-PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
- if (is64Bit) {
- CodePointerSize = CalleeSaveStackSlotSize = 8;
- }
- IsLittleEndian = false;
-
- SeparatorString = "@";
- CommentString = ";";
- ExceptionsType = ExceptionHandling::DwarfCFI;
-
- if (!is64Bit)
- Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode.
-
- AssemblerDialect = 1; // New-Style mnemonics.
- SupportsDebugInformation= true; // Debug information.
-
- // The installed assembler for OSX < 10.6 lacks some directives.
- // FIXME: this should really be a check on the assembler characteristics
- // rather than OS version
- if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
- HasWeakDefCanBeHiddenDirective = false;
-
- UseIntegratedAssembler = true;
-}
-
void PPCELFMCAsmInfo::anchor() { }
PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
@@ -87,4 +60,5 @@ PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
ZeroDirective = "\t.space\t";
+ SymbolsHaveSMC = true;
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 42cb62ad26a4..8c52bbbd8a56 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -20,13 +20,6 @@
namespace llvm {
class Triple;
-class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
-public:
- explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple &);
-};
-
class PPCELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 90c3c8d20edb..cbfb8e2ff282 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -30,6 +30,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
@@ -76,14 +77,13 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
}
static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TheTriple) {
+ const Triple &TheTriple,
+ const MCTargetOptions &Options) {
bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
TheTriple.getArch() == Triple::ppc64le);
MCAsmInfo *MAI;
- if (TheTriple.isOSDarwin())
- MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
- else if (TheTriple.isOSBinFormatXCOFF())
+ if (TheTriple.isOSBinFormatXCOFF())
MAI = new PPCXCOFFMCAsmInfo(isPPC64, TheTriple);
else
MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
@@ -107,8 +107,11 @@ public:
: PPCTargetStreamer(S), OS(OS) {}
void emitTCEntry(const MCSymbol &S) override {
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
OS << "\t.tc ";
- OS << S.getName();
+ OS << (MAI->getSymbolsHaveSMC()
+ ? cast<MCSymbolXCOFF>(S).getUnqualifiedName()
+ : S.getName());
OS << "[TC],";
OS << S.getName();
OS << '\n';
@@ -196,7 +199,8 @@ public:
void finish() override {
for (auto *Sym : UpdateOther)
- copyLocalEntry(Sym, Sym->getVariableValue());
+ if (Sym->isVariable())
+ copyLocalEntry(Sym, Sym->getVariableValue());
}
private:
@@ -242,7 +246,10 @@ public:
PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
void emitTCEntry(const MCSymbol &S) override {
- report_fatal_error("TOC entries not supported yet.");
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+ const unsigned PointerSize = MAI->getCodePointerSize();
+ Streamer.EmitValueToAlignment(PointerSize);
+ Streamer.EmitSymbolValue(&S, PointerSize);
}
void emitMachine(StringRef CPU) override {
@@ -285,7 +292,7 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
return new PPCInstPrinter(MAI, MII, MRI, T);
}
-extern "C" void LLVMInitializePowerPCTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
for (Target *T :
{&getThePPC32Target(), &getThePPC64Target(), &getThePPC64LETarget()}) {
// Register the MC asm info.
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 74b67bd2e928..49443679bb31 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -82,6 +82,30 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}
+static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
+ if (!Val)
+ return false;
+
+ if (isShiftedMask_64(Val)) {
+ // look for the first non-zero bit
+ MB = countLeadingZeros(Val);
+ // look for the first zero bit after the run of ones
+ ME = countLeadingZeros((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_64(Val)) {
+ // effectively look for the first zero bit
+ ME = countLeadingZeros(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
} // end namespace llvm
// Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index f6cd8ed00c82..9b3d13989ee2 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -107,7 +107,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
(instregex "XSMAX(C|J)?DP$"),
(instregex "XSMIN(C|J)?DP$"),
(instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
- (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
+ (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
(instregex "POPCNT(D|W)$"),
(instregex "CMPB(8)?$"),
(instregex "SETB(8)?$"),
@@ -129,26 +129,26 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instregex "MTV(S)?RW(A|Z)$"),
(instregex "CMP(WI|LWI|W|LW)(8)?$"),
(instregex "CMP(L)?D(I)?$"),
- (instregex "SUBF(I)?C(8)?$"),
- (instregex "ANDI(S)?o(8)?$"),
- (instregex "ADDC(8)?$"),
- (instregex "ADDIC(8)?(o)?$"),
- (instregex "ADD(8|4)(o)?$"),
- (instregex "ADD(E|ME|ZE)(8)?(o)?$"),
- (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"),
- (instregex "NEG(8)?(o)?$"),
+ (instregex "SUBF(I)?C(8)?(O)?$"),
+ (instregex "ANDI(S)?(8)?(_rec)?$"),
+ (instregex "ADDC(8)?(O)?$"),
+ (instregex "ADDIC(8)?(_rec)?$"),
+ (instregex "ADD(8|4)(O)?(_rec)?$"),
+ (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
+ (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
+ (instregex "NEG(8)?(O)?(_rec)?$"),
(instregex "POPCNTB$"),
(instregex "ADD(I|IS)?(8)?$"),
(instregex "LI(S)?(8)?$"),
- (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
- (instregex "NAND(8)?(o)?$"),
- (instregex "AND(C)?(8)?(o)?$"),
- (instregex "NOR(8)?(o)?$"),
- (instregex "OR(C)?(8)?(o)?$"),
- (instregex "EQV(8)?(o)?$"),
- (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
+ (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
+ (instregex "NAND(8)?(_rec)?$"),
+ (instregex "AND(C)?(8)?(_rec)?$"),
+ (instregex "NOR(8)?(_rec)?$"),
+ (instregex "OR(C)?(8)?(_rec)?$"),
+ (instregex "EQV(8)?(_rec)?$"),
+ (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
(instregex "ADD(4|8)(TLS)?(_)?$"),
- (instregex "NEG(8)?$"),
+ (instregex "NEG(8)?(O)?$"),
(instregex "ADDI(S)?toc(HA|L)(8)?$"),
COPY,
MCRF,
@@ -211,8 +211,8 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instregex "VABSDU(B|H|W)$"),
(instregex "VADDU(B|H|W)S$"),
(instregex "VAVG(S|U)(B|H|W)$"),
- (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
- (instregex "VCMPBFP(o)?$"),
+ (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
+ (instregex "VCMPBFP(_rec)?$"),
(instregex "VC(L|T)Z(B|H|W|D)$"),
(instregex "VADDS(B|H|W)S$"),
(instregex "V(MIN|MAX)FP$"),
@@ -233,43 +233,43 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
VSUBUWS,
VSUBCUW,
VCMPGTSB,
- VCMPGTSBo,
+ VCMPGTSB_rec,
VCMPGTSD,
- VCMPGTSDo,
+ VCMPGTSD_rec,
VCMPGTSH,
- VCMPGTSHo,
+ VCMPGTSH_rec,
VCMPGTSW,
- VCMPGTSWo,
+ VCMPGTSW_rec,
VCMPGTUB,
- VCMPGTUBo,
+ VCMPGTUB_rec,
VCMPGTUD,
- VCMPGTUDo,
+ VCMPGTUD_rec,
VCMPGTUH,
- VCMPGTUHo,
+ VCMPGTUH_rec,
VCMPGTUW,
- VCMPGTUWo,
- VCMPNEBo,
- VCMPNEHo,
- VCMPNEWo,
- VCMPNEZBo,
- VCMPNEZHo,
- VCMPNEZWo,
- VCMPEQUBo,
- VCMPEQUDo,
- VCMPEQUHo,
- VCMPEQUWo,
+ VCMPGTUW_rec,
+ VCMPNEB_rec,
+ VCMPNEH_rec,
+ VCMPNEW_rec,
+ VCMPNEZB_rec,
+ VCMPNEZH_rec,
+ VCMPNEZW_rec,
+ VCMPEQUB_rec,
+ VCMPEQUD_rec,
+ VCMPEQUH_rec,
+ VCMPEQUW_rec,
XVCMPEQDP,
- XVCMPEQDPo,
+ XVCMPEQDP_rec,
XVCMPEQSP,
- XVCMPEQSPo,
+ XVCMPEQSP_rec,
XVCMPGEDP,
- XVCMPGEDPo,
+ XVCMPGEDP_rec,
XVCMPGESP,
- XVCMPGESPo,
+ XVCMPGESP_rec,
XVCMPGTDP,
- XVCMPGTDPo,
+ XVCMPGTDP_rec,
XVCMPGTSP,
- XVCMPGTSPo,
+ XVCMPGTSP_rec,
XVMAXDP,
XVMAXSP,
XVMINDP,
@@ -399,7 +399,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
(instregex "MADD(HD|HDU|LD|LD8)$"),
- (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
+ (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
)>;
// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
@@ -451,14 +451,14 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "FSEL(D|S)o$")
+ (instregex "FSEL(D|S)_rec$")
)>;
// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "MUL(H|L)(D|W)(U)?o$")
+ (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
)>;
// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
@@ -467,18 +467,18 @@ def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "FRI(N|P|Z|M)(D|S)o$"),
- (instregex "FRE(S)?o$"),
- (instregex "FADD(S)?o$"),
- (instregex "FSUB(S)?o$"),
- (instregex "F(N)?MSUB(S)?o$"),
- (instregex "F(N)?MADD(S)?o$"),
- (instregex "FCFID(U)?(S)?o$"),
- (instregex "FCTID(U)?(Z)?o$"),
- (instregex "FCTIW(U)?(Z)?o$"),
- (instregex "FMUL(S)?o$"),
- (instregex "FRSQRTE(S)?o$"),
- FRSPo
+ (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
+ (instregex "FRE(S)?_rec$"),
+ (instregex "FADD(S)?_rec$"),
+ (instregex "FSUB(S)?_rec$"),
+ (instregex "F(N)?MSUB(S)?_rec$"),
+ (instregex "F(N)?MADD(S)?_rec$"),
+ (instregex "FCFID(U)?(S)?_rec$"),
+ (instregex "FCTID(U)?(Z)?_rec$"),
+ (instregex "FCTIW(U)?(Z)?_rec$"),
+ (instregex "FMUL(S)?_rec$"),
+ (instregex "FRSQRTE(S)?_rec$"),
+ FRSP_rec
)>;
// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
@@ -613,16 +613,16 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
XSCMPUQP,
XSTSTDCQP,
XSXSIGQP,
- BCDCFNo,
- BCDCFZo,
- BCDCPSGNo,
- BCDCTNo,
- BCDCTZo,
- BCDSETSGNo,
- BCDSo,
- BCDTRUNCo,
- BCDUSo,
- BCDUTRUNCo
+ BCDCFN_rec,
+ BCDCFZ_rec,
+ BCDCPSGN_rec,
+ BCDCTN_rec,
+ BCDCTZ_rec,
+ BCDSETSGN_rec,
+ BCDS_rec,
+ BCDTRUNC_rec,
+ BCDUS_rec,
+ BCDUTRUNC_rec
)>;
// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -630,7 +630,7 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
// dispatch.
def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
- BCDSRo,
+ BCDSR_rec,
XSADDQP,
XSADDQPO,
XSCVDPQP,
@@ -654,7 +654,7 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
// dispatch.
def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
- BCDCTSQo
+ BCDCTSQ_rec
)>;
// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -679,7 +679,7 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
// dispatch.
def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
- BCDCFSQo
+ BCDCFSQ_rec
)>;
// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -819,7 +819,7 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
DISP_1C, DISP_1C],
(instrs
(instregex "LHA(X)?(8)?$"),
- (instregex "CP_PASTE(8)?o$"),
+ (instregex "CP_PASTE(8)?_rec$"),
(instregex "LWA(X)?(_32)?$"),
TCHECK
)>;
@@ -946,7 +946,9 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVW,
+ DIVWO,
DIVWU,
+ DIVWUO,
MODSW
)>;
@@ -956,9 +958,13 @@ def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVWE,
+ DIVWEO,
DIVD,
+ DIVDO,
DIVWEU,
+ DIVWEUO,
DIVDU,
+ DIVDUO,
MODSD,
MODUD,
MODUW
@@ -970,7 +976,9 @@ def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVDE,
- DIVDEU
+ DIVDEO,
+ DIVDEU,
+ DIVDEUO
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
@@ -979,7 +987,7 @@ def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
DISP_EVEN_1C, DISP_1C],
(instrs
- (instregex "DIVW(U)?(O)?o$")
+ (instregex "DIVW(U)?(O)?_rec$")
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
@@ -988,10 +996,14 @@ def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
DISP_EVEN_1C, DISP_1C],
(instrs
- DIVDo,
- DIVDUo,
- DIVWEo,
- DIVWEUo
+ DIVD_rec,
+ DIVDO_rec,
+ DIVDU_rec,
+ DIVDUO_rec,
+ DIVWE_rec,
+ DIVWEO_rec,
+ DIVWEU_rec,
+ DIVWEUO_rec
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
@@ -1000,8 +1012,10 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
DISP_EVEN_1C, DISP_1C],
(instrs
- DIVDEo,
- DIVDEUo
+ DIVDE_rec,
+ DIVDEO_rec,
+ DIVDEU_rec,
+ DIVDEUO_rec
)>;
// CR access instructions in _BrMCR, IIC_BrMCRX.
@@ -1026,8 +1040,8 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
DISP_1C, DISP_1C],
(instrs
- (instregex "ADDC(8)?o$"),
- (instregex "SUBFC(8)?o$")
+ (instregex "ADDC(8)?(O)?_rec$"),
+ (instregex "SUBFC(8)?(O)?_rec$")
)>;
// Cracked ALU operations.
@@ -1038,10 +1052,10 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "F(N)?ABS(D|S)o$"),
- (instregex "FCPSGN(D|S)o$"),
- (instregex "FNEG(D|S)o$"),
- FMRo
+ (instregex "F(N)?ABS(D|S)_rec$"),
+ (instregex "FCPSGN(D|S)_rec$"),
+ (instregex "FNEG(D|S)_rec$"),
+ FMR_rec
)>;
// Cracked ALU operations.
@@ -1063,8 +1077,8 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
- (instregex "MTFSF(b|o)?$"),
- (instregex "MTFSFI(o)?$")
+ (instregex "MTFSF(b|_rec)?$"),
+ (instregex "MTFSFI(_rec)?$")
)>;
// Cracked instruction made of two ALU ops.
@@ -1073,13 +1087,13 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- (instregex "RLD(I)?C(R|L)o$"),
- (instregex "RLW(IMI|INM|NM)(8)?o$"),
- (instregex "SLW(8)?o$"),
- (instregex "SRAW(I)?o$"),
- (instregex "SRW(8)?o$"),
- RLDICL_32o,
- RLDIMIo
+ (instregex "RLD(I)?C(R|L)_rec$"),
+ (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
+ (instregex "SLW(8)?_rec$"),
+ (instregex "SRAW(I)?_rec$"),
+ (instregex "SRW(8)?_rec$"),
+ RLDICL_32_rec,
+ RLDIMI_rec
)>;
// Cracked instruction made of two ALU ops.
@@ -1088,7 +1102,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
- (instregex "MFFS(L|CE|o)?$")
+ (instregex "MFFS(L|CE|_rec)?$")
)>;
// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
@@ -1104,12 +1118,12 @@ def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
// The two ops cannot be done in parallel.
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instrs
- (instregex "EXTSWSLI_32_64o$"),
- (instregex "SRAD(I)?o$"),
- EXTSWSLIo,
- SLDo,
- SRDo,
- RLDICo
+ (instregex "EXTSWSLI_32_64_rec$"),
+ (instregex "SRAD(I)?_rec$"),
+ EXTSWSLI_rec,
+ SLD_rec,
+ SRD_rec,
+ RLDIC_rec
)>;
// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
@@ -1122,7 +1136,7 @@ def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- FDIVo
+ FDIV_rec
)>;
// 36 Cycle DP Instruction.
@@ -1156,7 +1170,7 @@ def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- FSQRTo
+ FSQRT_rec
)>;
// 26 Cycle DP Instruction.
@@ -1175,7 +1189,7 @@ def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- FSQRTSo
+ FSQRTS_rec
)>;
// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
@@ -1194,7 +1208,7 @@ def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_1C],
(instrs
- FDIVSo
+ FDIVS_rec
)>;
// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
@@ -1304,6 +1318,7 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
BCLalways,
BCLn,
BCTRL8_LDinto_toc,
+ BCTRL_LWZinto_toc,
BCn,
CTRL_DEP
)>;
@@ -1400,7 +1415,7 @@ def : InstRW<[],
MBAR,
MSYNC,
SLBSYNC,
- SLBFEEo,
+ SLBFEE_rec,
NAP,
STOP,
TRAP,
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 0534773c4c9e..a83509f0e687 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -28,12 +28,13 @@ namespace llvm {
class AsmPrinter;
class MCInst;
class MCOperand;
-
+ class ModulePass;
+
FunctionPass *createPPCCTRLoops();
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
- FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM);
+ FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM);
FunctionPass *createPPCTOCRegDepsPass();
FunctionPass *createPPCEarlyReturnPass();
FunctionPass *createPPCVSXCopyPass();
@@ -59,7 +60,7 @@ namespace llvm {
#ifndef NDEBUG
void initializePPCCTRLoopsVerifyPass(PassRegistry&);
#endif
- void initializePPCLoopPreIncPrepPass(PassRegistry&);
+ void initializePPCLoopInstrFormPrepPass(PassRegistry&);
void initializePPCTOCRegDepsPass(PassRegistry&);
void initializePPCEarlyReturnPass(PassRegistry&);
void initializePPCVSXCopyPass(PassRegistry&);
@@ -77,6 +78,10 @@ namespace llvm {
extern char &PPCVSXFMAMutateID;
+ ModulePass *createPPCLowerMASSVEntriesPass();
+ void initializePPCLowerMASSVEntriesPass(PassRegistry &);
+ extern char &PPCLowerMASSVEntriesID;
+
namespace PPCII {
/// Target Operand Flag enum.
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 8e94a2ae15e0..bef0a81ee3ad 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -22,35 +22,37 @@ include "llvm/Target/Target.td"
// CPU Directives //
//===----------------------------------------------------------------------===//
-def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
-def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
-def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
-def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
-def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
-def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
-def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
-def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
-def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
-def DirectiveE500 : SubtargetFeature<"", "DarwinDirective",
+def Directive440 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_440", "">;
+def Directive601 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "CPUDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_64", "">;
+def DirectiveA2 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_A2", "">;
+def DirectiveE500 : SubtargetFeature<"", "CPUDirective",
"PPC::DIR_E500", "">;
-def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
+def DirectiveE500mc : SubtargetFeature<"", "CPUDirective",
"PPC::DIR_E500mc", "">;
-def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
+def DirectiveE5500 : SubtargetFeature<"", "CPUDirective",
"PPC::DIR_E5500", "">;
-def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
-def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
-def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr3: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR5", "">;
def DirectivePwr5x
- : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
-def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+ : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR5X", "">;
+def DirectivePwr6: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR6", "">;
def DirectivePwr6x
- : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
-def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
-def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
-def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">;
+ : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR6X", "">;
+def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">;
+def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">;
+def DirectivePwrFuture
+ : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
@@ -164,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
+def FeatureUnalignedFloats :
+ SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
+ "true", "CPU does not trap on unaligned FP access">;
def FeaturePPCPreRASched:
SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
"Use PowerPC pre-RA scheduling strategy">;
@@ -209,36 +214,95 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
// came before them, the idea is to make implementations of new processors
// less error prone and easier to read.
// Namely:
-// list<SubtargetFeature> Power8FeatureList = ...
-// list<SubtargetFeature> FutureProcessorSpecificFeatureList =
-// [ features that Power8 does not support ]
-// list<SubtargetFeature> FutureProcessorFeatureList =
-// !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+// list<SubtargetFeature> P8InheritableFeatures = ...
+// list<SubtargetFeature> FutureProcessorAddtionalFeatures =
+// [ features that Power8 does not support but inheritable ]
+// list<SubtargetFeature> FutureProcessorSpecificFeatures =
+// [ features that Power8 does not support and not inheritable ]
+// list<SubtargetFeature> FutureProcessorInheritableFeatures =
+// !listconcat(P8InheritableFeatures, FutureProcessorAddtionalFeatures)
+// list<SubtargetFeature> FutureProcessorFeatures =
+// !listconcat(FutureProcessorInheritableFeatures,
+// FutureProcessorSpecificFeatures)
// Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
// well as providing a single point of definition if the feature set will be
// used elsewhere.
def ProcessorFeatures {
- list<SubtargetFeature> Power7FeatureList =
- [DirectivePwr7, FeatureAltivec, FeatureVSX,
- FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
- FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */,
- FeatureBPERMD, FeatureExtDiv,
- FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
- list<SubtargetFeature> Power8SpecificFeatures =
- [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
- FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
- list<SubtargetFeature> Power8FeatureList =
- !listconcat(Power7FeatureList, Power8SpecificFeatures);
- list<SubtargetFeature> Power9SpecificFeatures =
- [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0,
- FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched];
- list<SubtargetFeature> Power9FeatureList =
- !listconcat(Power8FeatureList, Power9SpecificFeatures);
+ // Power7
+ list<SubtargetFeature> P7InheritableFeatures = [DirectivePwr7,
+ FeatureAltivec,
+ FeatureVSX,
+ FeatureMFOCRF,
+ FeatureFCPSGN,
+ FeatureFSqrt,
+ FeatureFRE,
+ FeatureFRES,
+ FeatureFRSQRTE,
+ FeatureFRSQRTES,
+ FeatureRecipPrec,
+ FeatureSTFIWX,
+ FeatureLFIWAX,
+ FeatureFPRND,
+ FeatureFPCVT,
+ FeatureISEL,
+ FeaturePOPCNTD,
+ FeatureCMPB,
+ FeatureLDBRX,
+ Feature64Bit,
+ /* Feature64BitRegs, */
+ FeatureBPERMD,
+ FeatureExtDiv,
+ FeatureMFTB,
+ DeprecatedDST,
+ FeatureTwoConstNR,
+ FeatureUnalignedFloats];
+ list<SubtargetFeature> P7SpecificFeatures = [];
+ list<SubtargetFeature> P7Features =
+ !listconcat(P7InheritableFeatures, P7SpecificFeatures);
+
+ // Power8
+ list<SubtargetFeature> P8AdditionalFeatures = [DirectivePwr8,
+ FeatureP8Altivec,
+ FeatureP8Vector,
+ FeatureP8Crypto,
+ FeatureHTM,
+ FeatureDirectMove,
+ FeatureICBT,
+ FeaturePartwordAtomic];
+ list<SubtargetFeature> P8SpecificFeatures = [];
+ list<SubtargetFeature> P8InheritableFeatures =
+ !listconcat(P7InheritableFeatures, P8AdditionalFeatures);
+ list<SubtargetFeature> P8Features =
+ !listconcat(P8InheritableFeatures, P8SpecificFeatures);
+
+ // Power9
+ list<SubtargetFeature> P9AdditionalFeatures = [DirectivePwr9,
+ FeatureP9Altivec,
+ FeatureP9Vector,
+ FeatureISA3_0];
+ // Some features are unique to Power9 and there is no reason to assume
+ // they will be part of any future CPUs. One example is the narrower
+ // dispatch for vector operations than scalar ones. For the time being,
+ // this list also includes scheduling-related features since we do not have
+ // enough info to create custom scheduling strategies for future CPUs.
+ list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits,
+ FeaturePPCPreRASched,
+ FeaturePPCPostRASched];
+ list<SubtargetFeature> P9InheritableFeatures =
+ !listconcat(P8InheritableFeatures, P9AdditionalFeatures);
+ list<SubtargetFeature> P9Features =
+ !listconcat(P9InheritableFeatures, P9SpecificFeatures);
+
+ // Future
+ // For future CPU we assume that all of the existing features from Power 9
+ // still exist with the exception of those we know are Power 9 specific.
+ list<SubtargetFeature> FutureAdditionalFeatures = [];
+ list<SubtargetFeature> FutureSpecificFeatures = [];
+ list<SubtargetFeature> FutureInheritableFeatures =
+ !listconcat(P9InheritableFeatures, FutureAdditionalFeatures);
+ list<SubtargetFeature> FutureFeatures =
+ !listconcat(FutureInheritableFeatures, FutureSpecificFeatures);
}
// Note: Future features to add when support is extended to more
@@ -378,7 +442,7 @@ def : ProcessorModel<"g5", G5Model,
def : ProcessorModel<"e500", PPCE500Model,
[DirectiveE500,
FeatureICBT, FeatureBookE,
- FeatureISEL, FeatureMFTB]>;
+ FeatureISEL, FeatureMFTB, FeatureSPE]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc,
FeatureSTFIWX, FeatureICBT, FeatureBookE,
@@ -438,9 +502,12 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit,
FeatureMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
-def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
+def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
+def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
+// No scheduler model for future CPU.
+def : ProcessorModel<"future", NoSchedModel,
+ ProcessorFeatures.FutureFeatures>;
def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
FeatureMFTB]>;
def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
@@ -451,7 +518,7 @@ def : ProcessorModel<"ppc64", G5Model,
FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */,
FeatureMFTB]>;
-def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>;
+def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.P8Features>;
//===----------------------------------------------------------------------===//
// Calling Conventions
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 66236b72a1a3..4311df5dbeb8 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -43,6 +43,7 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -79,9 +80,11 @@ namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
MapVector<const MCSymbol *, MCSymbol *> TOC;
- const PPCSubtarget *Subtarget;
+ const PPCSubtarget *Subtarget = nullptr;
StackMaps SM;
+ virtual MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO);
+
public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
@@ -144,23 +147,12 @@ public:
void EmitInstruction(const MachineInstr *MI) override;
};
-/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
-/// OS X
-class PPCDarwinAsmPrinter : public PPCAsmPrinter {
-public:
- explicit PPCDarwinAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : PPCAsmPrinter(TM, std::move(Streamer)) {}
-
- StringRef getPassName() const override {
- return "Darwin PPC Assembly Printer";
- }
-
- bool doFinalization(Module &M) override;
- void EmitStartOfAsmFile(Module &M) override;
-};
-
class PPCAIXAsmPrinter : public PPCAsmPrinter {
+private:
+ static void ValidateGV(const GlobalVariable *GV);
+protected:
+ MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO) override;
+
public:
PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: PPCAsmPrinter(TM, std::move(Streamer)) {}
@@ -169,6 +161,8 @@ public:
void SetupMachineFunction(MachineFunction &MF) override;
+ const MCExpr *lowerConstant(const Constant *CV) override;
+
void EmitGlobalVariable(const GlobalVariable *GV) override;
void EmitFunctionDescriptor() override;
@@ -351,8 +345,12 @@ void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
unsigned NumNOPBytes = MI.getOperand(1).getImm();
+
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
- SM.recordStackMap(MI);
+ SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
// Scan ahead to trim the shadow.
@@ -377,7 +375,11 @@ void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>
void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
- SM.recordPatchPoint(MI);
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer->EmitLabel(MILabel);
+
+ SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
unsigned EncodedBytes = 0;
@@ -490,9 +492,9 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
(!Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
"GETtls[ld]ADDR[32] must read GPR3");
- if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
- isPositionIndependent())
+ if (Subtarget->is32BitELFABI() && isPositionIndependent())
Kind = MCSymbolRefExpr::VK_PLT;
+
const MCExpr *TlsRef =
MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
@@ -514,17 +516,16 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
/// Map a machine operand for a TOC pseudo-machine instruction to its
/// corresponding MCSymbol.
-static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
- AsmPrinter &AP) {
+MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
switch (MO.getType()) {
case MachineOperand::MO_GlobalAddress:
- return AP.getSymbol(MO.getGlobal());
+ return getSymbol(MO.getGlobal());
case MachineOperand::MO_ConstantPoolIndex:
- return AP.GetCPISymbol(MO.getIndex());
+ return GetCPISymbol(MO.getIndex());
case MachineOperand::MO_JumpTableIndex:
- return AP.GetJTISymbol(MO.getIndex());
+ return GetJTISymbol(MO.getIndex());
case MachineOperand::MO_BlockAddress:
- return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+ return GetBlockAddressSymbol(MO.getBlockAddress());
default:
llvm_unreachable("Unexpected operand type to get symbol.");
}
@@ -688,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for LWZtoc.");
// Map the operand to its corresponding MCSymbol.
- const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO);
// Create a reference to the GOT entry for the symbol. The GOT entry will be
// synthesized later.
@@ -749,7 +750,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// global address operand to be a reference to the TOC entry we will
// synthesize later.
MCSymbol *TOCEntry =
- lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
+ lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO));
const MCSymbolRefExpr::VariantKind VK =
IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
@@ -775,7 +776,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for ADDIStocHA.");
// Map the machine operand to its corresponding MCSymbol.
- MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
// Always use TOC on AIX. Map the global address operand to be a reference
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -805,7 +806,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for LWZtocL.");
// Map the machine operand to its corresponding MCSymbol.
- MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
// Always use TOC on AIX. Map the global address operand to be a reference
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -835,7 +836,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA8!");
- const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
const bool GlobalToc =
MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
@@ -881,7 +882,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"LDtocL used on symbol that could be accessed directly is "
"invalid. Must match ADDIStocHA8."));
- const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -911,7 +912,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Interposable definitions must use indirect access."));
const MCExpr *Exp =
- MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
+ MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO),
MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -1578,151 +1579,6 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
}
}
-void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
- static const char *const CPUDirectives[] = {
- "",
- "ppc",
- "ppc440",
- "ppc601",
- "ppc602",
- "ppc603",
- "ppc7400",
- "ppc750",
- "ppc970",
- "ppcA2",
- "ppce500",
- "ppce500mc",
- "ppce5500",
- "power3",
- "power4",
- "power5",
- "power5x",
- "power6",
- "power6x",
- "power7",
- // FIXME: why is power8 missing here?
- "ppc64",
- "ppc64le",
- "power9"
- };
-
- // Get the numerically largest directive.
- // FIXME: How should we merge darwin directives?
- unsigned Directive = PPC::DIR_NONE;
- for (const Function &F : M) {
- const PPCSubtarget &STI = TM.getSubtarget<PPCSubtarget>(F);
- unsigned FDir = STI.getDarwinDirective();
- Directive = Directive > FDir ? FDir : STI.getDarwinDirective();
- if (STI.hasMFOCRF() && Directive < PPC::DIR_970)
- Directive = PPC::DIR_970;
- if (STI.hasAltivec() && Directive < PPC::DIR_7400)
- Directive = PPC::DIR_7400;
- if (STI.isPPC64() && Directive < PPC::DIR_64)
- Directive = PPC::DIR_64;
- }
-
- assert(Directive <= PPC::DIR_64 && "Directive out of range.");
-
- assert(Directive < array_lengthof(CPUDirectives) &&
- "CPUDirectives[] might not be up-to-date!");
- PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
- TStreamer.emitMachine(CPUDirectives[Directive]);
-
- // Prime text sections so they are adjacent. This reduces the likelihood a
- // large data or debug section causes a branch to exceed 16M limit.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
- OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
- if (TM.getRelocationModel() == Reloc::PIC_) {
- OutStreamer->SwitchSection(
- OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
- MachO::S_SYMBOL_STUBS |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- 32, SectionKind::getText()));
- } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
- OutStreamer->SwitchSection(
- OutContext.getMachOSection("__TEXT","__symbol_stub1",
- MachO::S_SYMBOL_STUBS |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- 16, SectionKind::getText()));
- }
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-}
-
-bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
-
- // Darwin/PPC always uses mach-o.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
- if (MMI) {
- MachineModuleInfoMachO &MMIMacho =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
- if (MAI->doesSupportExceptionHandling()) {
- // Add the (possibly multiple) personalities to the set of global values.
- // Only referenced functions get into the Personalities list.
- for (const Function *Personality : MMI->getPersonalities()) {
- if (Personality) {
- MCSymbol *NLPSym =
- getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMIMacho.getGVStubEntry(NLPSym);
- StubSym =
- MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
- }
- }
- }
-
- // Output stubs for dynamically-linked functions.
- MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
-
- // Output macho stubs for external and common global variables.
- if (!Stubs.empty()) {
- // Switch with ".non_lazy_symbol_pointer" directive.
- OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
- EmitAlignment(isPPC64 ? Align(8) : Align(4));
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer->EmitLabel(Stubs[i].first);
- // .indirect_symbol _foo
- MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
- OutStreamer->EmitSymbolAttribute(MCSym.getPointer(),
- MCSA_IndirectSymbol);
-
- if (MCSym.getInt())
- // External to current translation unit.
- OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4 /*size*/);
- else
- // Internal to current translation unit.
- //
- // When we place the LSDA into the TEXT section, the type info
- // pointers
- // need to be indirect and pc-rel. We accomplish this by using NLPs.
- // However, sometimes the types are local to the file. So we need to
- // fill in the value for the NLP in those cases.
- OutStreamer->EmitValue(
- MCSymbolRefExpr::create(MCSym.getPointer(), OutContext),
- isPPC64 ? 8 : 4 /*size*/);
- }
-
- Stubs.clear();
- OutStreamer->AddBlankLine();
- }
- }
-
- // Funny Darwin hack: This flag tells the linker that no global symbols
- // contain code that falls through to other global symbols (e.g. the obvious
- // implementation of multiple entry points). If this doesn't occur, the
- // linker can safely perform dead code stripping. Since LLVM never generates
- // code that does this, it is always safe to set.
- OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-
- return AsmPrinter::doFinalization(M);
-}
-
void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
// Get the function descriptor symbol.
CurrentFnDescSym = getSymbol(&MF.getFunction());
@@ -1735,7 +1591,7 @@ void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
return AsmPrinter::SetupMachineFunction(MF);
}
-void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) {
// Early error checking limiting what is supported.
if (GV->isThreadLocal())
report_fatal_error("Thread local not yet supported on AIX.");
@@ -1745,22 +1601,51 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GV->hasComdat())
report_fatal_error("COMDAT not yet supported by AIX.");
+}
+
+const MCExpr *PPCAIXAsmPrinter::lowerConstant(const Constant *CV) {
+ if (const Function *F = dyn_cast<Function>(CV)) {
+ MCSymbolXCOFF *FSym = cast<MCSymbolXCOFF>(getSymbol(F));
+ if (!FSym->hasContainingCsect()) {
+ const XCOFF::StorageClass SC =
+ F->isDeclaration()
+ ? TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F)
+ : XCOFF::C_HIDEXT;
+ MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
+ FSym->getName(), XCOFF::XMC_DS,
+ F->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD, SC,
+ SectionKind::getData());
+ FSym->setContainingCsect(Csect);
+ }
+ return MCSymbolRefExpr::create(
+ FSym->getContainingCsect()->getQualNameSymbol(), OutContext);
+ }
+ return PPCAsmPrinter::lowerConstant(CV);
+}
+
+void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ ValidateGV(GV);
+
+ // External global variables are already handled.
+ if (!GV->hasInitializer())
+ return;
+
+ // Create the symbol, set its storage class.
+ MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
+ GVSym->setStorageClass(
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM);
- if (!GVKind.isCommon() && !GVKind.isBSSLocal() && !GVKind.isData())
+ if ((!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly()) ||
+ GVKind.isMergeable2ByteCString() || GVKind.isMergeable4ByteCString())
report_fatal_error("Encountered a global variable kind that is "
"not supported yet.");
// Create the containing csect and switch to it.
- MCSectionXCOFF *CSect = cast<MCSectionXCOFF>(
+ MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
- OutStreamer->SwitchSection(CSect);
-
- // Create the symbol, set its storage class, and emit it.
- MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
- GVSym->setStorageClass(
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
- GVSym->setContainingCsect(CSect);
+ OutStreamer->SwitchSection(Csect);
+ GVSym->setContainingCsect(Csect);
const DataLayout &DL = GV->getParent()->getDataLayout();
@@ -1771,9 +1656,10 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
if (GVKind.isBSSLocal())
- OutStreamer->EmitXCOFFLocalCommonSymbol(GVSym, Size, Align);
+ OutStreamer->EmitXCOFFLocalCommonSymbol(
+ GVSym, Size, Csect->getQualNameSymbol(), Align);
else
- OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ OutStreamer->EmitCommonSymbol(Csect->getQualNameSymbol(), Size, Align);
return;
}
@@ -1797,7 +1683,10 @@ void PPCAIXAsmPrinter::EmitFunctionDescriptor() {
OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
PointerSize);
// Emit TOC base address.
- MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
+ const MCSectionXCOFF *TOCBaseSec = OutStreamer->getContext().getXCOFFSection(
+ StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
+ SectionKind::getData());
+ const MCSymbol *TOCBaseSym = TOCBaseSec->getQualNameSymbol();
OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
PointerSize);
// Emit a null environment pointer.
@@ -1813,15 +1702,90 @@ void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
return;
// Emit TOC base.
- MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection(
StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
SectionKind::getData());
- cast<MCSymbolXCOFF>(TOCBaseSym)->setContainingCsect(TOCBaseSection);
+ // The TOC-base always has 0 size, but 4 byte alignment.
+ TOCBaseSection->setAlignment(Align(4));
// Switch to section to emit TOC base.
OutStreamer->SwitchSection(TOCBaseSection);
+
+ PPCTargetStreamer &TS =
+ static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ for (auto &I : TOC) {
+ // Setup the csect for the current TC entry.
+ MCSectionXCOFF *TCEntry = OutStreamer->getContext().getXCOFFSection(
+ cast<MCSymbolXCOFF>(I.first)->getUnqualifiedName(), XCOFF::XMC_TC,
+ XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
+ cast<MCSymbolXCOFF>(I.second)->setContainingCsect(TCEntry);
+ OutStreamer->SwitchSection(TCEntry);
+
+ OutStreamer->EmitLabel(I.second);
+ TS.emitTCEntry(*I.first);
+ }
}
+MCSymbol *
+PPCAIXAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
+ const GlobalObject *GO = nullptr;
+
+ // If the MO is a function or certain kind of globals, we want to make sure to
+ // refer to the csect symbol, otherwise we can just do the default handling.
+ if (MO.getType() != MachineOperand::MO_GlobalAddress ||
+ !(GO = dyn_cast<const GlobalObject>(MO.getGlobal())))
+ return PPCAsmPrinter::getMCSymbolForTOCPseudoMO(MO);
+
+ // Do an early error check for globals we don't support. This will go away
+ // eventually.
+ const auto *GV = dyn_cast<const GlobalVariable>(GO);
+ if (GV) {
+ ValidateGV(GV);
+ }
+
+ MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(getSymbol(GO));
+
+ // If the global object is a global variable without initializer or is a
+ // declaration of a function, then XSym is an external referenced symbol.
+ // Hence we may need to explictly create a MCSectionXCOFF for it so that we
+ // can return its symbol later.
+ if (GO->isDeclaration()) {
+ if (!XSym->hasContainingCsect()) {
+ // Make sure the storage class is set.
+ const XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+ XSym->setStorageClass(SC);
+
+ MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
+ XSym->getName(), isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA,
+ XCOFF::XTY_ER, SC, SectionKind::getMetadata());
+ XSym->setContainingCsect(Csect);
+ }
+
+ return XSym->getContainingCsect()->getQualNameSymbol();
+ }
+
+ // Handle initialized global variables and defined functions.
+ SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
+
+ if (GOKind.isText()) {
+ // If the MO is a function, we want to make sure to refer to the function
+ // descriptor csect.
+ return OutStreamer->getContext()
+ .getXCOFFSection(XSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
+ XCOFF::C_HIDEXT, SectionKind::getData())
+ ->getQualNameSymbol();
+ } else if (GOKind.isCommon() || GOKind.isBSSLocal()) {
+ // If the operand is a common then we should refer to the csect symbol.
+ return cast<MCSectionXCOFF>(
+ getObjFileLowering().SectionForGlobal(GO, GOKind, TM))
+ ->getQualNameSymbol();
+ }
+
+ // Other global variables are refered to by labels inside of a single csect,
+ // so refer to the label directly.
+ return getSymbol(GV);
+}
/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
/// for a MachineFunction to the given output stream, in a format that the
@@ -1830,8 +1794,6 @@ void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
static AsmPrinter *
createPPCAsmPrinterPass(TargetMachine &tm,
std::unique_ptr<MCStreamer> &&Streamer) {
- if (tm.getTargetTriple().isMacOSX())
- return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
if (tm.getTargetTriple().isOSAIX())
return new PPCAIXAsmPrinter(tm, std::move(Streamer));
@@ -1839,7 +1801,7 @@ createPPCAsmPrinterPass(TargetMachine &tm,
}
// Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(getThePPC32Target(),
createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getThePPC64Target(),
diff --git a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index d325b078979f..109b665e0d57 100644
--- a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2b8d9b87724f..4ce705300e1b 100644
--- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -33,10 +33,8 @@
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/Constants.h"
@@ -47,6 +45,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -54,6 +53,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#ifndef NDEBUG
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 06a4d183e781..4c608520e265 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -782,8 +782,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
- bool needsCFI = MMI.hasDebugInfo() ||
- MF.getFunction().needsUnwindTableEntry();
+ bool needsCFI = MF.needsFrameMoves();
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
@@ -2442,10 +2441,6 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
}
unsigned PPCFrameLowering::getTOCSaveOffset() const {
- if (Subtarget.isAIXABI())
- // TOC save/restore is normally handled by the linker.
- // Indirect calls should hit this limitation.
- report_fatal_error("TOC save is not implemented on AIX yet.");
return TOCSaveOffset;
}
diff --git a/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 391ebcc1a143..ffaa3e05c847 100644
--- a/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -158,7 +158,7 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
// new group.
if (isLoadAfterStore(SU) && CurSlots < 6) {
unsigned Directive =
- DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
// If we're using a special group-terminating nop, then we need only one.
// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
@@ -218,7 +218,7 @@ void PPCDispatchGroupSBHazardRecognizer::Reset() {
void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
unsigned Directive =
- DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
// If the group has now filled all of its slots, or if we're using a special
// group-terminating nop, the group is complete.
// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 4ad6c88233fe..776ec52e2604 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -138,9 +138,9 @@ namespace {
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
- const PPCSubtarget *PPCSubTarget;
- const PPCTargetLowering *PPCLowering;
- unsigned GlobalBaseReg;
+ const PPCSubtarget *PPCSubTarget = nullptr;
+ const PPCTargetLowering *PPCLowering = nullptr;
+ unsigned GlobalBaseReg = 0;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
@@ -204,6 +204,7 @@ namespace {
bool tryBitfieldInsert(SDNode *N);
bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N);
+ bool tryAndWithMask(SDNode *N);
// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
// an X-Form load instruction with the offset being a relocation coming from
@@ -309,7 +310,6 @@ namespace {
errs() << "ConstraintID: " << ConstraintID << "\n";
llvm_unreachable("Unexpected asm memory constraint");
case InlineAsm::Constraint_es:
- case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_o:
case InlineAsm::Constraint_Q:
@@ -512,13 +512,14 @@ static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
return isInt64Immediate(N.getNode(), Imm);
}
-static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
+static unsigned getBranchHint(unsigned PCC,
+ const FunctionLoweringInfo &FuncInfo,
const SDValue &DestMBB) {
assert(isa<BasicBlockSDNode>(DestMBB));
- if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
+ if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
- const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+ const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
const Instruction *BBTerm = BB->getTerminator();
if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
@@ -526,8 +527,8 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
const BasicBlock *TBB = BBTerm->getSuccessor(0);
const BasicBlock *FBB = BBTerm->getSuccessor(1);
- auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
- auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
+ auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
+ auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
// We only want to handle cases which are easy to predict at static time, e.g.
// C++ throw statement, that is very likely not taken, or calling never
@@ -547,7 +548,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
return PPC::BR_NO_HINT;
- LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName()
+ LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
<< "::" << BB->getName() << "'\n"
<< " -> " << TBB->getName() << ": " << TProb << "\n"
<< " -> " << FBB->getName() << ": " << FProb << "\n");
@@ -1044,7 +1045,7 @@ static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
if (Use->isMachineOpcode())
return 0;
MaxTruncation =
- std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
+ std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
continue;
case ISD::STORE: {
if (Use->isMachineOpcode())
@@ -1399,11 +1400,14 @@ class BitPermutationSelector {
for (unsigned i = 0; i < NumValidBits; ++i)
Bits[i] = (*LHSBits)[i];
- // These bits are known to be zero.
+ // These bits are known to be zero but the AssertZext may be from a value
+ // that already has some constant zero bits (i.e. from a masking and).
for (unsigned i = NumValidBits; i < NumBits; ++i)
- Bits[i] = ValueBit((*LHSBits)[i].getValue(),
- (*LHSBits)[i].getValueBitIndex(),
- ValueBit::VariableKnownToBeZero);
+ Bits[i] = (*LHSBits)[i].hasValue()
+ ? ValueBit((*LHSBits)[i].getValue(),
+ (*LHSBits)[i].getValueBitIndex(),
+ ValueBit::VariableKnownToBeZero)
+ : ValueBit(ValueBit::ConstZero);
return std::make_pair(Interesting, &Bits);
}
@@ -1811,11 +1815,14 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
- ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
- VRot, getI32Imm(ANDIMask, dl)), 0);
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
+ VRot, getI32Imm(ANDIMask, dl)),
+ 0);
if (ANDISMask != 0)
- ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
- VRot, getI32Imm(ANDISMask, dl)), 0);
+ ANDISVal =
+ SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
+ getI32Imm(ANDISMask, dl)),
+ 0);
SDValue TotalVal;
if (!ANDIVal)
@@ -1904,11 +1911,14 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
- ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
- Res, getI32Imm(ANDIMask, dl)), 0);
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
+ Res, getI32Imm(ANDIMask, dl)),
+ 0);
if (ANDISMask != 0)
- ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
- Res, getI32Imm(ANDISMask, dl)), 0);
+ ANDISVal =
+ SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
+ getI32Imm(ANDISMask, dl)),
+ 0);
if (!ANDIVal)
Res = ANDISVal;
@@ -2181,15 +2191,16 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
- ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
ExtendToInt64(VRot, dl),
getI32Imm(ANDIMask, dl)),
0);
if (ANDISMask != 0)
- ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
- ExtendToInt64(VRot, dl),
- getI32Imm(ANDISMask, dl)),
- 0);
+ ANDISVal =
+ SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
+ ExtendToInt64(VRot, dl),
+ getI32Imm(ANDISMask, dl)),
+ 0);
if (!ANDIVal)
TotalVal = ANDISVal;
@@ -2330,11 +2341,16 @@ class BitPermutationSelector {
SDValue ANDIVal, ANDISVal;
if (ANDIMask != 0)
- ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
- ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
+ ExtendToInt64(Res, dl),
+ getI32Imm(ANDIMask, dl)),
+ 0);
if (ANDISMask != 0)
- ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
- ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
+ ANDISVal =
+ SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
+ ExtendToInt64(Res, dl),
+ getI32Imm(ANDISMask, dl)),
+ 0);
if (!ANDIVal)
Res = ANDISVal;
@@ -2385,7 +2401,7 @@ class BitPermutationSelector {
SmallVector<ValueBit, 64> Bits;
- bool NeedMask;
+ bool NeedMask = false;
SmallVector<unsigned, 64> RLAmt;
SmallVector<BitGroup, 16> BitGroups;
@@ -2393,7 +2409,7 @@ class BitPermutationSelector {
DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
SmallVector<ValueRotInfo, 16> ValueRotsVec;
- SelectionDAG *CurDAG;
+ SelectionDAG *CurDAG = nullptr;
public:
BitPermutationSelector(SelectionDAG *DAG)
@@ -2623,8 +2639,9 @@ SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
assert((NewOpc != -1 || !IsBitwiseNegate) &&
"No record form available for AND8/OR8/XOR8?");
WideOp =
- SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
- MVT::i64, MVT::Glue, LHS, RHS), 0);
+ SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
+ dl, MVT::i64, MVT::Glue, LHS, RHS),
+ 0);
}
// Select this node to a single bit from CR0 set by the record-form node
@@ -3597,7 +3614,7 @@ SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
if (ConvOpts == SetccInGPROpts::ZExtInvert ||
ConvOpts == SetccInGPROpts::SExtInvert)
- CC = ISD::getSetCCInverse(CC, true);
+ CC = ISD::getSetCCInverse(CC, InputVT);
bool Inputs32Bit = InputVT == MVT::i32;
@@ -3832,7 +3849,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
-static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
+ const PPCSubtarget *Subtarget) {
+ // For SPE instructions, the result is in GT bit of the CR
+ bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
+
switch (CC) {
case ISD::SETUEQ:
case ISD::SETONE:
@@ -3841,17 +3862,23 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
llvm_unreachable("Should be lowered by legalize!");
default: llvm_unreachable("Unknown condition!");
case ISD::SETOEQ:
- case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETEQ:
+ return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
case ISD::SETUNE:
- case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETNE:
+ return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
case ISD::SETOLT:
- case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETLT:
+ return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
case ISD::SETULE:
- case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETLE:
+ return UseSPE ? PPC::PRED_LE : PPC::PRED_LE;
case ISD::SETOGT:
- case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETGT:
+ return UseSPE ? PPC::PRED_GT : PPC::PRED_GT;
case ISD::SETUGE:
- case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETGE:
+ return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
case ISD::SETO: return PPC::PRED_NU;
case ISD::SETUO: return PPC::PRED_UN;
// These two are invalid for floating point. Assume we have int.
@@ -4344,6 +4371,142 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
return true;
}
+bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) {
+ if (N->getOpcode() != ISD::AND)
+ return false;
+
+ SDLoc dl(N);
+ SDValue Val = N->getOperand(0);
+ unsigned Imm, Imm2, SH, MB, ME;
+ uint64_t Imm64;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ return true;
+ }
+
+ // If this is just a masked value where the input is not handled, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ if (isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ return true;
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (Imm == 0) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return true;
+ }
+
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
+ // bitfield insert.
+ if (N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ // The idea here is to check whether this is equivalent to:
+ // (c1 & m) | (x & ~m)
+ // where m is a run-of-ones mask. The logic here is that, for each bit in
+ // c1 and c2:
+ // - if both are 1, then the output will be 1.
+ // - if both are 0, then the output will be 0.
+ // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
+ // come from x.
+ // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
+ // be 0.
+ // If that last condition is never the case, then we can form m from the
+ // bits that are the same between c1 and c2.
+ unsigned MB, ME;
+ if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
+ return true;
+ }
+ }
+ } else if (isInt64Immediate(N->getOperand(1).getNode(), Imm64)) {
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ if (isMask_64(Imm64)) {
+ MB = 64 - countTrailingOnes(Imm64);
+ SH = 0;
+
+ if (Val.getOpcode() == ISD::ANY_EXTEND) {
+ auto Op0 = Val.getOperand(0);
+ if ( Op0.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+ auto ResultType = Val.getNode()->getValueType(0);
+ auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
+ ResultType);
+ SDValue IDVal (ImDef, 0);
+
+ Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
+ ResultType, IDVal, Op0.getOperand(0),
+ getI32Imm(1, dl)), 0);
+ SH = 64 - Imm;
+ }
+ }
+
+ // If the operand is a logical right shift, we can fold it into this
+ // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+ // for n <= mb. The right shift is really a left rotate followed by a
+ // mask, and this mask is a more-restrictive sub-mask of the mask implied
+ // by the shift.
+ if (Val.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+ assert(Imm < 64 && "Illegal shift amount");
+ Val = Val.getOperand(0);
+ SH = 64 - Imm;
+ }
+
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+ return true;
+ } else if (isMask_64(~Imm64)) {
+ // If this is a negated 64-bit zero-extension mask,
+ // i.e. the immediate is a sequence of ones from most significant side
+ // and all zero for reminder, we should use rldicr.
+ MB = 63 - countTrailingOnes(~Imm64);
+ SH = 0;
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+ return true;
+ }
+
+ // It is not 16-bit imm that means we need two instructions at least if
+ // using "and" instruction. Try to exploit it with rotate mask instructions.
+ if (isRunOfOnes64(Imm64, MB, ME)) {
+ if (MB >= 32 && MB <= ME) {
+ // MB ME
+ // +----------------------+
+ // |xxxxxxxxxxx00011111000|
+ // +----------------------+
+ // 0 32 64
+ // We can only do it if the MB is larger than 32 and MB <= ME
+ // as RLWINM will replace the content of [0 - 32) with [32 - 64) even
+ // we didn't rotate it.
+ SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl),
+ getI64Imm(ME - 32, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
+ return true;
+ }
+ // TODO - handle it with rldicl + rldicl
+ }
+ }
+
+ return false;
+}
+
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -4565,121 +4728,13 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
}
- case ISD::AND: {
- unsigned Imm, Imm2, SH, MB, ME;
- uint64_t Imm64;
-
- // If this is an and of a value rotated between 0 and 31 bits and then and'd
- // with a mask, emit rlwinm
- if (isInt32Immediate(N->getOperand(1), Imm) &&
- isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
- SDValue Val = N->getOperand(0).getOperand(0);
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
- return;
- }
- // If this is just a masked value where the input is not handled above, and
- // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
- if (isInt32Immediate(N->getOperand(1), Imm) &&
- isRunOfOnes(Imm, MB, ME) &&
- N->getOperand(0).getOpcode() != ISD::ROTL) {
- SDValue Val = N->getOperand(0);
- SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
- return;
- }
- // If this is a 64-bit zero-extension mask, emit rldicl.
- if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
- isMask_64(Imm64)) {
- SDValue Val = N->getOperand(0);
- MB = 64 - countTrailingOnes(Imm64);
- SH = 0;
-
- if (Val.getOpcode() == ISD::ANY_EXTEND) {
- auto Op0 = Val.getOperand(0);
- if ( Op0.getOpcode() == ISD::SRL &&
- isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
-
- auto ResultType = Val.getNode()->getValueType(0);
- auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
- ResultType);
- SDValue IDVal (ImDef, 0);
-
- Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
- ResultType, IDVal, Op0.getOperand(0),
- getI32Imm(1, dl)), 0);
- SH = 64 - Imm;
- }
- }
-
- // If the operand is a logical right shift, we can fold it into this
- // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
- // for n <= mb. The right shift is really a left rotate followed by a
- // mask, and this mask is a more-restrictive sub-mask of the mask implied
- // by the shift.
- if (Val.getOpcode() == ISD::SRL &&
- isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
- assert(Imm < 64 && "Illegal shift amount");
- Val = Val.getOperand(0);
- SH = 64 - Imm;
- }
-
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
- return;
- }
- // If this is a negated 64-bit zero-extension mask,
- // i.e. the immediate is a sequence of ones from most significant side
- // and all zero for reminder, we should use rldicr.
- if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
- isMask_64(~Imm64)) {
- SDValue Val = N->getOperand(0);
- MB = 63 - countTrailingOnes(~Imm64);
- SH = 0;
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
- return;
- }
-
- // AND X, 0 -> 0, not "rlwinm 32".
- if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
- ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ case ISD::AND:
+ // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
+ if (tryAndWithMask(N))
return;
- }
- // ISD::OR doesn't get all the bitfield insertion fun.
- // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
- // bitfield insert.
- if (isInt32Immediate(N->getOperand(1), Imm) &&
- N->getOperand(0).getOpcode() == ISD::OR &&
- isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
- // The idea here is to check whether this is equivalent to:
- // (c1 & m) | (x & ~m)
- // where m is a run-of-ones mask. The logic here is that, for each bit in
- // c1 and c2:
- // - if both are 1, then the output will be 1.
- // - if both are 0, then the output will be 0.
- // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
- // come from x.
- // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
- // be 0.
- // If that last condition is never the case, then we can form m from the
- // bits that are the same between c1 and c2.
- unsigned MB, ME;
- if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
- SDValue Ops[] = { N->getOperand(0).getOperand(0),
- N->getOperand(0).getOperand(1),
- getI32Imm(0, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
- return;
- }
- }
// Other cases are autogenerated.
break;
- }
case ISD::OR: {
if (N->getValueType(0) == MVT::i32)
if (tryBitfieldInsert(N))
@@ -4781,24 +4836,24 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
break;
}
// FIXME: Remove this once the ANDI glue bug is fixed:
- case PPCISD::ANDIo_1_EQ_BIT:
- case PPCISD::ANDIo_1_GT_BIT: {
+ case PPCISD::ANDI_rec_1_EQ_BIT:
+ case PPCISD::ANDI_rec_1_GT_BIT: {
if (!ANDIGlueBug)
break;
EVT InVT = N->getOperand(0).getValueType();
assert((InVT == MVT::i64 || InVT == MVT::i32) &&
- "Invalid input type for ANDIo_1_EQ_BIT");
+ "Invalid input type for ANDI_rec_1_EQ_BIT");
- unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
+ unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
N->getOperand(0),
CurDAG->getTargetConstant(1, dl, InVT)),
0);
SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
- SDValue SRIdxVal =
- CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
- PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
+ SDValue SRIdxVal = CurDAG->getTargetConstant(
+ N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
+ dl, MVT::i32);
CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
@@ -4889,7 +4944,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- unsigned BROpc = getPredicateForSetCC(CC);
+ unsigned BROpc =
+ getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget);
unsigned SelectCCOp;
if (N->getValueType(0) == MVT::i32)
@@ -5002,7 +5058,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Prevent PPC::PRED_* from being selected into LI.
unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
if (EnableBranchHint)
- PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
+ PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
SDValue Pred = getI32Imm(PCC, dl);
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
@@ -5012,7 +5068,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
- unsigned PCC = getPredicateForSetCC(CC);
+ unsigned PCC =
+ getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget);
if (N->getOperand(2).getValueType() == MVT::i1) {
unsigned Opc;
@@ -5045,7 +5102,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
if (EnableBranchHint)
- PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
+ PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
@@ -6181,8 +6238,8 @@ static bool PeepholePPC64ZExtGather(SDValue Op32,
// For ANDI and ANDIS, the higher-order bits are zero if either that is true
// of the first operand, or if the second operand is positive (so that it is
// not sign extended).
- if (Op32.getMachineOpcode() == PPC::ANDIo ||
- Op32.getMachineOpcode() == PPC::ANDISo) {
+ if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
+ Op32.getMachineOpcode() == PPC::ANDIS_rec) {
SmallPtrSet<SDNode *, 16> ToPromote1;
bool Op0OK =
PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
@@ -6304,8 +6361,12 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
case PPC::ORI: NewOpcode = PPC::ORI8; break;
case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
case PPC::AND: NewOpcode = PPC::AND8; break;
- case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break;
- case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break;
+ case PPC::ANDI_rec:
+ NewOpcode = PPC::ANDI8_rec;
+ break;
+ case PPC::ANDIS_rec:
+ NewOpcode = PPC::ANDIS8_rec;
+ break;
}
// Note: During the replacement process, the nodes will be in an
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8cf6a660b08b..60ed72e1018b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -52,6 +52,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
@@ -66,6 +67,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -119,6 +121,9 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
cl::desc("enable quad precision float support on ppc"), cl::Hidden);
+static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
+cl::desc("use absolute jump tables on ppc"), cl::Hidden);
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
@@ -132,10 +137,6 @@ extern cl::opt<bool> ANDIGlueBug;
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
const PPCSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- // Use _setjmp/_longjmp instead of setjmp/longjmp.
- setUseUnderscoreSetJmp(true);
- setUseUnderscoreLongJmp(true);
-
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget.isPPC64();
@@ -389,6 +390,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::LRINT, MVT::f64, Legal);
+ setOperationAction(ISD::LRINT, MVT::f32, Legal);
+ setOperationAction(ISD::LLRINT, MVT::f64, Legal);
+ setOperationAction(ISD::LLRINT, MVT::f32, Legal);
+ setOperationAction(ISD::LROUND, MVT::f64, Legal);
+ setOperationAction(ISD::LROUND, MVT::f32, Legal);
+ setOperationAction(ISD::LLROUND, MVT::f64, Legal);
+ setOperationAction(ISD::LLROUND, MVT::f32, Legal);
+ }
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -548,6 +559,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+ }
+
if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
@@ -702,6 +720,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!Subtarget.hasP8Altivec())
setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+ // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
+ if (Subtarget.hasAltivec())
+ for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
+ setOperationAction(ISD::ROTL, VT, Legal);
+ // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
+
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -756,13 +782,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+ // The nearbyint variants are not allowed to raise the inexact exception
+ // so we can only code-gen them with unsafe math.
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ }
+
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
@@ -910,12 +946,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FREM, MVT::f128, Expand);
}
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
-
+ setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
}
if (Subtarget.hasP9Altivec()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
}
@@ -1183,7 +1230,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.isDarwin())
setPrefFunctionAlignment(Align(16));
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default: break;
case PPC::DIR_970:
case PPC::DIR_A2:
@@ -1198,6 +1245,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE:
setPrefLoopAlignment(Align(16));
setPrefFunctionAlignment(Align(16));
break;
@@ -1212,15 +1260,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// The Freescale cores do better with aggressive inlining of memcpy and
// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
- if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
- Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
+ if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
+ Subtarget.getCPUDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
- } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
// The A2 also benefits from (very) aggressive inlining of memcpy and
// friends. The overhead of a the function call, even when warm, can be
// over one hundred cycles.
@@ -1294,6 +1342,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
+ case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
@@ -1314,7 +1364,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
- case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
@@ -1345,8 +1394,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
- case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
- case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
+ case PPCISD::ANDI_rec_1_EQ_BIT:
+ return "PPCISD::ANDI_rec_1_EQ_BIT";
+ case PPCISD::ANDI_rec_1_GT_BIT:
+ return "PPCISD::ANDI_rec_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
@@ -2699,9 +2750,9 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
const Constant *C = CP->getConstVal();
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return getTOCEntry(DAG, SDLoc(CP), GA);
@@ -2735,14 +2786,16 @@ unsigned PPCTargetLowering::getJumpTableEncoding() const {
}
bool PPCTargetLowering::isJumpTableRelative() const {
- if (Subtarget.isPPC64())
+ if (UseAbsoluteJumpTables)
+ return false;
+ if (Subtarget.isPPC64() || Subtarget.isAIXABI())
return true;
return TargetLowering::isJumpTableRelative();
}
SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
- if (!Subtarget.isPPC64())
+ if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
switch (getTargetMachine().getCodeModel()) {
@@ -2759,7 +2812,7 @@ const MCExpr *
PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,
MCContext &Ctx) const {
- if (!Subtarget.isPPC64())
+ if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
switch (getTargetMachine().getCodeModel()) {
@@ -2775,9 +2828,9 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return getTOCEntry(DAG, SDLoc(JT), GA);
@@ -2804,9 +2857,9 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
const BlockAddress *BA = BASDN->getBlockAddress();
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
return getTOCEntry(DAG, SDLoc(BASDN), GA);
@@ -3129,11 +3182,17 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
+
return Op.getOperand(0);
}
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
+
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -3394,15 +3453,16 @@ SDValue PPCTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ if (Subtarget.isAIXABI())
+ return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
if (Subtarget.is64BitELFABI())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
- else if (Subtarget.is32BitELFABI())
+ if (Subtarget.is32BitELFABI())
return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
- // FIXME: We are using this for both AIX and Darwin. We should add appropriate
- // AIX testing, and rename it appropriately.
return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
}
@@ -4934,213 +4994,6 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
return false;
}
-static unsigned
-PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
- SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
- bool isPatchPoint, bool hasNest,
- SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
- SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
- bool isPPC64 = Subtarget.isPPC64();
- bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
- bool isAIXABI = Subtarget.isAIXABI();
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- NodeTys.push_back(MVT::Other); // Returns a chain
- NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
-
- unsigned CallOpc = PPCISD::CALL;
-
- bool needIndirectCall = true;
- if (!isSVR4ABI || !isPPC64)
- if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
- // If this is an absolute destination address, use the munged value.
- Callee = SDValue(Dest, 0);
- needIndirectCall = false;
- }
-
- // PC-relative references to external symbols should go through $stub, unless
- // we're building with the leopard linker or later, which automatically
- // synthesizes these stubs.
- const TargetMachine &TM = DAG.getTarget();
- const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
- const GlobalValue *GV = nullptr;
- if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
- GV = G->getGlobal();
- bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
- bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress /
- // TargetExternalSymbol node so that legalize doesn't hack it.
- if (isFunctionGlobalAddress(Callee)) {
- GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
-
- // A call to a TLS address is actually an indirect call to a
- // thread-specific pointer.
- unsigned OpFlags = 0;
- if (UsePlt)
- OpFlags = PPCII::MO_PLT;
-
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType(), 0, OpFlags);
- needIndirectCall = false;
- }
-
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- unsigned char OpFlags = 0;
-
- if (UsePlt)
- OpFlags = PPCII::MO_PLT;
-
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
- OpFlags);
- needIndirectCall = false;
- }
-
- if (isPatchPoint) {
- // We'll form an invalid direct call when lowering a patchpoint; the full
- // sequence for an indirect call is complicated, and many of the
- // instructions introduced might have side effects (and, thus, can't be
- // removed later). The call itself will be removed as soon as the
- // argument/return lowering is complete, so the fact that it has the wrong
- // kind of operands should not really matter.
- needIndirectCall = false;
- }
-
- if (needIndirectCall) {
- // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
- // to do the call, we can't use PPCISD::CALL.
- SDValue MTCTROps[] = {Chain, Callee, InFlag};
-
- if (is64BitELFv1ABI) {
- // Function pointers in the 64-bit SVR4 ABI do not point to the function
- // entry point, but to the function descriptor (the function entry point
- // address is part of the function descriptor though).
- // The function descriptor is a three doubleword structure with the
- // following fields: function entry point, TOC base address and
- // environment pointer.
- // Thus for a call through a function pointer, the following actions need
- // to be performed:
- // 1. Save the TOC of the caller in the TOC save area of its stack
- // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
- // 2. Load the address of the function entry point from the function
- // descriptor.
- // 3. Load the TOC of the callee from the function descriptor into r2.
- // 4. Load the environment pointer from the function descriptor into
- // r11.
- // 5. Branch to the function entry point address.
- // 6. On return of the callee, the TOC of the caller needs to be
- // restored (this is done in FinishCall()).
- //
- // The loads are scheduled at the beginning of the call sequence, and the
- // register copies are flagged together to ensure that no other
- // operations can be scheduled in between. E.g. without flagging the
- // copies together, a TOC access in the caller could be scheduled between
- // the assignment of the callee TOC and the branch to the callee, which
- // results in the TOC access going through the TOC of the callee instead
- // of going through the TOC of the caller, which leads to incorrect code.
-
- // Load the address of the function entry point from the function
- // descriptor.
- SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
- if (LDChain.getValueType() == MVT::Glue)
- LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
-
- auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
- ? (MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant)
- : MachineMemOperand::MONone;
-
- MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
- SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
- /* Alignment = */ 8, MMOFlags);
-
- // Load environment pointer into r11.
- SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
- SDValue LoadEnvPtr =
- DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
- /* Alignment = */ 8, MMOFlags);
-
- SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
- SDValue TOCPtr =
- DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
- /* Alignment = */ 8, MMOFlags);
-
- setUsesTOCBasePtr(DAG);
- SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
- InFlag);
- Chain = TOCVal.getValue(0);
- InFlag = TOCVal.getValue(1);
-
- // If the function call has an explicit 'nest' parameter, it takes the
- // place of the environment pointer.
- if (!hasNest) {
- SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
- InFlag);
-
- Chain = EnvVal.getValue(0);
- InFlag = EnvVal.getValue(1);
- }
-
- MTCTROps[0] = Chain;
- MTCTROps[1] = LoadFuncPtr;
- MTCTROps[2] = InFlag;
- }
-
- Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
- makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
- InFlag = Chain.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Other);
- NodeTys.push_back(MVT::Glue);
- Ops.push_back(Chain);
- CallOpc = PPCISD::BCTRL;
- Callee.setNode(nullptr);
- // Add use of X11 (holding environment pointer)
- if (is64BitELFv1ABI && !hasNest)
- Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
- // Add CTR register as callee so a bctr can be emitted later.
- if (isTailCall)
- Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
- }
-
- // If this is a direct call, pass the chain and the callee.
- if (Callee.getNode()) {
- Ops.push_back(Chain);
- Ops.push_back(Callee);
- }
- // If this is a tail call add stack pointer delta.
- if (isTailCall)
- Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
- // live into the call.
- // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
- if ((isSVR4ABI && isPPC64) || isAIXABI) {
- setUsesTOCBasePtr(DAG);
-
- // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
- // no way to mark dependencies as implicit here.
- // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
- if (!isPatchPoint)
- Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
- : PPC::R2, PtrVT));
- }
-
- return CallOpc;
-}
-
SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -5205,30 +5058,357 @@ SDValue PPCTargetLowering::LowerCallResult(
return Chain;
}
-SDValue PPCTargetLowering::FinishCall(
- CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
- bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
- SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
- unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
- std::vector<EVT> NodeTys;
- SmallVector<SDValue, 8> Ops;
- unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
- SPDiff, isTailCall, isPatchPoint, hasNest,
- RegsToPass, Ops, NodeTys, CS, Subtarget);
+static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget, bool isPatchPoint) {
+ // PatchPoint calls are not indirect.
+ if (isPatchPoint)
+ return false;
+
+ if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
+ return false;
+
+ // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
+ // becuase the immediate function pointer points to a descriptor instead of
+ // a function entry point. The ELFv2 ABI cannot use a BLA because the function
+ // pointer immediate points to the global entry point, while the BLA would
+ // need to jump to the local entry point (see rL211174).
+ if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
+ isBLACompatibleAddress(Callee, DAG))
+ return false;
+
+ return true;
+}
+
+static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
+ bool isTailCall, const Function &Caller,
+ const SDValue &Callee,
+ const PPCSubtarget &Subtarget,
+ const TargetMachine &TM) {
+ if (isTailCall)
+ return PPCISD::TC_RETURN;
+
+ // This is a call through a function pointer.
+ if (isIndirectCall) {
+ // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
+ // indirect calls. The save of the caller's TOC pointer to the stack will be
+ // inserted into the DAG as part of call lowering. The restore of the TOC
+ // pointer is modeled by using a pseudo instruction for the call opcode that
+ // represents the 2 instruction sequence of an indirect branch and link,
+ // immediately followed by a load of the TOC pointer from the the stack save
+ // slot into gpr2.
+ if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ return PPCISD::BCTRL_LOAD_TOC;
+
+ // An indirect call that does not need a TOC restore.
+ return PPCISD::BCTRL;
+ }
+
+ // The ABIs that maintain a TOC pointer accross calls need to have a nop
+ // immediately following the call instruction if the caller and callee may
+ // have different TOC bases. At link time if the linker determines the calls
+ // may not share a TOC base, the call is redirected to a trampoline inserted
+ // by the linker. The trampoline will (among other things) save the callers
+ // TOC pointer at an ABI designated offset in the linkage area and the linker
+ // will rewrite the nop to be a load of the TOC pointer from the linkage area
+ // into gpr2.
+ if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+ : PPCISD::CALL_NOP;
+
+ return PPCISD::CALL;
+}
+
+static bool isValidAIXExternalSymSDNode(StringRef SymName) {
+ return StringSwitch<bool>(SymName)
+ .Cases("__divdi3", "__fixunsdfdi", "__floatundidf", "__floatundisf",
+ "__moddi3", "__udivdi3", "__umoddi3", true)
+ .Cases("ceil", "floor", "memcpy", "memmove", "memset", "round", true)
+ .Default(false);
+}
+
+static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
+ const SDLoc &dl, const PPCSubtarget &Subtarget) {
+ if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ return SDValue(Dest, 0);
+
+ // Returns true if the callee is local, and false otherwise.
+ auto isLocalCallee = [&]() {
+ const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ const GlobalValue *GV = G ? G->getGlobal() : nullptr;
+
+ return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
+ !dyn_cast_or_null<GlobalIFunc>(GV);
+ };
+
+ // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
+ // a static relocation model causes some versions of GNU LD (2.17.50, at
+ // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
+ // built with secure-PLT.
+ bool UsePlt =
+ Subtarget.is32BitELFABI() && !isLocalCallee() &&
+ Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
+
+ // On AIX, direct function calls reference the symbol for the function's
+ // entry point, which is named by prepending a "." before the function's
+ // C-linkage name.
+ const auto getAIXFuncEntryPointSymbolSDNode =
+ [&](StringRef FuncName, bool IsDeclaration,
+ const XCOFF::StorageClass &SC) {
+ auto &Context = DAG.getMachineFunction().getMMI().getContext();
+
+ MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
+ Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
+
+ if (IsDeclaration && !S->hasContainingCsect()) {
+ // On AIX, an undefined symbol needs to be associated with a
+ // MCSectionXCOFF to get the correct storage mapping class.
+ // In this case, XCOFF::XMC_PR.
+ MCSectionXCOFF *Sec = Context.getXCOFFSection(
+ S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
+ SectionKind::getMetadata());
+ S->setContainingCsect(Sec);
+ }
+
+ MVT PtrVT =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ return DAG.getMCSymbol(S, PtrVT);
+ };
+
+ if (isFunctionGlobalAddress(Callee)) {
+ const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+ const GlobalValue *GV = G->getGlobal();
+
+ if (!Subtarget.isAIXABI())
+ return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
+ UsePlt ? PPCII::MO_PLT : 0);
+
+ assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
+ const GlobalObject *GO = cast<GlobalObject>(GV);
+ const XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+ return getAIXFuncEntryPointSymbolSDNode(GO->getName(), GO->isDeclaration(),
+ SC);
+ }
+
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *SymName = S->getSymbol();
+ if (!Subtarget.isAIXABI())
+ return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
+ UsePlt ? PPCII::MO_PLT : 0);
+
+ // If there exists a user-declared function whose name is the same as the
+ // ExternalSymbol's, then we pick up the user-declared version.
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ if (const Function *F =
+ dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) {
+ const XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F);
+ return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(),
+ SC);
+ }
+
+ // TODO: Remove this when the support for ExternalSymbolSDNode is complete.
+ if (isValidAIXExternalSymSDNode(SymName)) {
+ return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
+ }
+
+ report_fatal_error("Unexpected ExternalSymbolSDNode: " + Twine(SymName));
+ }
+
+ // No transformation needed.
+ assert(Callee.getNode() && "What no callee?");
+ return Callee;
+}
+
+static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
+ assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
+ "Expected a CALLSEQ_STARTSDNode.");
+
+ // The last operand is the chain, except when the node has glue. If the node
+ // has glue, then the last operand is the glue, and the chain is the second
+ // last operand.
+ SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
+ if (LastValue.getValueType() != MVT::Glue)
+ return LastValue;
+
+ return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
+}
+
+// Creates the node that moves a functions address into the count register
+// to prepare for an indirect call instruction.
+static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+ SDValue &Glue, SDValue &Chain,
+ const SDLoc &dl) {
+ SDValue MTCTROps[] = {Chain, Callee, Glue};
+ EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
+ makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
+ // The glue is the second value produced.
+ Glue = Chain.getValue(1);
+}
+
+static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+ SDValue &Glue, SDValue &Chain,
+ SDValue CallSeqStart,
+ ImmutableCallSite CS, const SDLoc &dl,
+ bool hasNest,
+ const PPCSubtarget &Subtarget) {
+ // Function pointers in the 64-bit SVR4 ABI do not point to the function
+ // entry point, but to the function descriptor (the function entry point
+ // address is part of the function descriptor though).
+ // The function descriptor is a three doubleword structure with the
+ // following fields: function entry point, TOC base address and
+ // environment pointer.
+ // Thus for a call through a function pointer, the following actions need
+ // to be performed:
+ // 1. Save the TOC of the caller in the TOC save area of its stack
+ // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
+ // 2. Load the address of the function entry point from the function
+ // descriptor.
+ // 3. Load the TOC of the callee from the function descriptor into r2.
+ // 4. Load the environment pointer from the function descriptor into
+ // r11.
+ // 5. Branch to the function entry point address.
+ // 6. On return of the callee, the TOC of the caller needs to be
+ // restored (this is done in FinishCall()).
+ //
+ // The loads are scheduled at the beginning of the call sequence, and the
+ // register copies are flagged together to ensure that no other
+ // operations can be scheduled in between. E.g. without flagging the
+ // copies together, a TOC access in the caller could be scheduled between
+ // the assignment of the callee TOC and the branch to the callee, which leads
+ // to incorrect code.
+
+ // Start by loading the function address from the descriptor.
+ SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
+ auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
+ ? (MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant)
+ : MachineMemOperand::MONone;
+
+ MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
+
+ // Registers used in building the DAG.
+ const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
+ const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
+
+ // Offsets of descriptor members.
+ const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
+ const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
+
+ const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+ const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
+
+ // One load for the functions entry point address.
+ SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
+ Alignment, MMOFlags);
+
+ // One for loading the TOC anchor for the module that contains the called
+ // function.
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
+ SDValue TOCPtr =
+ DAG.getLoad(RegVT, dl, LDChain, AddTOC,
+ MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
+
+ // One for loading the environment pointer.
+ SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
+ SDValue LoadEnvPtr =
+ DAG.getLoad(RegVT, dl, LDChain, AddPtr,
+ MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
+
+
+ // Then copy the newly loaded TOC anchor to the TOC pointer.
+ SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
+ Chain = TOCVal.getValue(0);
+ Glue = TOCVal.getValue(1);
+
+ // If the function call has an explicit 'nest' parameter, it takes the
+ // place of the environment pointer.
+ assert((!hasNest || !Subtarget.isAIXABI()) &&
+ "Nest parameter is not supported on AIX.");
+ if (!hasNest) {
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
+ Chain = EnvVal.getValue(0);
+ Glue = EnvVal.getValue(1);
+ }
+
+ // The rest of the indirect call sequence is the same as the non-descriptor
+ // DAG.
+ prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
+}
+
+static void
+buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
+ const SDLoc &dl, bool isTailCall, bool isVarArg,
+ bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+ SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
+ const PPCSubtarget &Subtarget, bool isIndirect) {
+ const bool IsPPC64 = Subtarget.isPPC64();
+ // MVT for a general purpose register.
+ const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+
+ // First operand is always the chain.
+ Ops.push_back(Chain);
+
+ // If it's a direct call pass the callee as the second operand.
+ if (!isIndirect)
+ Ops.push_back(Callee);
+ else {
+ assert(!isPatchPoint && "Patch point call are not indirect.");
+
+ // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
+ // on the stack (this would have been done in `LowerCall_64SVR4` or
+ // `LowerCall_AIX`). The call instruction is a pseudo instruction that
+ // represents both the indirect branch and a load that restores the TOC
+ // pointer from the linkage area. The operand for the TOC restore is an add
+ // of the TOC save offset to the stack pointer. This must be the second
+ // operand: after the chain input but before any other variadic arguments.
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+
+ SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
+ Ops.push_back(AddTOC);
+ }
+
+ // Add the register used for the environment pointer.
+ if (Subtarget.usesFunctionDescriptors() && !hasNest)
+ Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
+ RegVT));
+
+
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
+ }
+
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+ // no way to mark dependencies as implicit here.
+ // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+ if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint)
+ Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ if (isVarArg && Subtarget.is32BitELFABI())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
- // When performing tail call optimization the callee pops its arguments off
- // the stack. Account for this here so these bytes can be pushed back on in
- // PPCFrameLowering::eliminateCallFramePseudoInstr.
- int BytesCalleePops =
- (CallConv == CallingConv::Fast &&
- getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
-
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask =
@@ -5236,8 +5416,40 @@ SDValue PPCTargetLowering::FinishCall(
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ // If the glue is valid, it is the last operand.
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+}
+
+SDValue PPCTargetLowering::FinishCall(
+ CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
+ bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
+ SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
+ unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
+
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
+ setUsesTOCBasePtr(DAG);
+
+ const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint);
+ unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall,
+ DAG.getMachineFunction().getFunction(),
+ Callee, Subtarget, DAG.getTarget());
+
+ if (!isIndirect)
+ Callee = transformCallee(Callee, DAG, dl, Subtarget);
+ else if (Subtarget.usesFunctionDescriptors())
+ prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS,
+ dl, hasNest, Subtarget);
+ else
+ prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
+
+ // Build the operand list for the call instruction.
+ SmallVector<SDValue, 8> Ops;
+ buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+ hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff,
+ Subtarget, isIndirect);
// Emit tail call.
if (isTailCall) {
@@ -5246,81 +5458,32 @@ SDValue PPCTargetLowering::FinishCall(
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
isa<ConstantSDNode>(Callee)) &&
- "Expecting an global address, external symbol, absolute value or register");
-
+ "Expecting a global address, external symbol, absolute value or "
+ "register");
+ assert(CallOpc == PPCISD::TC_RETURN &&
+ "Unexpected call opcode for a tail call.");
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
- return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
+ return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
}
- // Add a NOP immediately after the branch instruction when using the 64-bit
- // SVR4 or the AIX ABI.
- // At link time, if caller and callee are in a different module and
- // thus have a different TOC, the call will be replaced with a call to a stub
- // function which saves the current TOC, loads the TOC of the callee and
- // branches to the callee. The NOP will be replaced with a load instruction
- // which restores the TOC of the caller from the TOC save slot of the current
- // stack frame. If caller and callee belong to the same module (and have the
- // same TOC), the NOP will remain unchanged, or become some other NOP.
-
- MachineFunction &MF = DAG.getMachineFunction();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- if (!isTailCall && !isPatchPoint &&
- ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
- Subtarget.isAIXABI())) {
- if (CallOpc == PPCISD::BCTRL) {
- if (Subtarget.isAIXABI())
- report_fatal_error("Indirect call on AIX is not implemented.");
-
- // This is a call through a function pointer.
- // Restore the caller TOC from the save area into R2.
- // See PrepareCall() for more information about calls through function
- // pointers in the 64-bit SVR4 ABI.
- // We are using a target-specific load with r2 hard coded, because the
- // result of a target-independent load would never go directly into r2,
- // since r2 is a reserved register (which prevents the register allocator
- // from allocating it), resulting in an additional register being
- // allocated and an unnecessary move instruction being generated.
- CallOpc = PPCISD::BCTRL_LOAD_TOC;
-
- SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
- unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
- SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+ std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
+ Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
+ Glue = Chain.getValue(1);
- // The address needs to go after the chain input but before the flag (or
- // any other variadic arguments).
- Ops.insert(std::next(Ops.begin()), AddTOC);
- } else if (CallOpc == PPCISD::CALL &&
- !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
- // Otherwise insert NOP for non-local calls.
- CallOpc = PPCISD::CALL_NOP;
- }
- }
-
- if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
- // On AIX, direct function calls reference the symbol for the function's
- // entry point, which is named by inserting a "." before the function's
- // C-linkage name.
- GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
- auto &Context = DAG.getMachineFunction().getMMI().getContext();
- MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
- Twine(G->getGlobal()->getName()));
- Callee = DAG.getMCSymbol(S, PtrVT);
- // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
- Ops[1] = Callee;
- }
-
- Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
+ int BytesCalleePops = (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt)
+ ? NumBytes
+ : 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
DAG.getIntPtrConstant(BytesCalleePops, dl, true),
- InFlag, dl);
- if (!Ins.empty())
- InFlag = Chain.getValue(1);
+ Glue, dl);
+ Glue = Chain.getValue(1);
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals);
}
SDValue
@@ -6273,8 +6436,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Check if this is an indirect call (MTCTR/BCTRL).
- // See PrepareCall() for more information about calls through function
- // pointers in the 64-bit SVR4 ABI.
+ // See prepareDescriptorIndirectCall and buildCallOperands for more
+ // information about calls through function pointers in the 64-bit SVR4 ABI.
if (!isTailCall && !isPatchPoint &&
!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
@@ -6695,6 +6858,205 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
NumBytes, Ins, InVals, CS);
}
+static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) {
+
+ if (ValVT == MVT::f128)
+ report_fatal_error("f128 is unimplemented on AIX.");
+
+ if (ArgFlags.isByVal())
+ report_fatal_error("Passing structure by value is unimplemented.");
+
+ if (ArgFlags.isNest())
+ report_fatal_error("Nest arguments are unimplemented.");
+
+ if (ValVT.isVector() || LocVT.isVector())
+ report_fatal_error("Vector arguments are unimplemented on AIX.");
+
+ const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
+ State.getMachineFunction().getSubtarget());
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+ static const MCPhysReg GPR_32[] = {// 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10};
+ static const MCPhysReg GPR_64[] = {// 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+
+ // Arguments always reserve parameter save area.
+ switch (ValVT.SimpleTy) {
+ default:
+ report_fatal_error("Unhandled value type for argument.");
+ case MVT::i64:
+ // i64 arguments should have been split to i32 for PPC32.
+ assert(IsPPC64 && "PPC32 should have split i64 values.");
+ LLVM_FALLTHROUGH;
+ case MVT::i1:
+ case MVT::i32:
+ State.AllocateStack(PtrByteSize, PtrByteSize);
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+ // Promote integers if needed.
+ if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
+ LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
+ : CCValAssign::LocInfo::ZExt;
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ }
+ else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ return false;
+
+ case MVT::f32:
+ case MVT::f64: {
+ // Parameter save area (PSA) is reserved even if the float passes in fpr.
+ const unsigned StoreSize = LocVT.getStoreSize();
+ // Floats are always 4-byte aligned in the PSA on AIX.
+ // This includes f64 in 64-bit mode for ABI compatibility.
+ State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
+ if (unsigned Reg = State.AllocateReg(FPR))
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+
+ // AIX requires that GPRs are reserved for float arguments.
+ // Successfully reserved GPRs are only initialized for vararg calls.
+ MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+ for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ if (State.isVarArg()) {
+ // Custom handling is required for:
+ // f64 in PPC32 needs to be split into 2 GPRs.
+ // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
+ State.addLoc(
+ CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ }
+ } else if (State.isVarArg()) {
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ }
+ }
+
+ return false;
+ }
+ }
+ return true;
+}
+
+static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
+ bool IsPPC64) {
+ assert((IsPPC64 || SVT != MVT::i64) &&
+ "i64 should have been split for 32-bit codegen.");
+
+ switch (SVT) {
+ default:
+ report_fatal_error("Unexpected value type for formal argument");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ case MVT::f32:
+ return &PPC::F4RCRegClass;
+ case MVT::f64:
+ return &PPC::F8RCRegClass;
+ }
+}
+
+static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
+ SelectionDAG &DAG, SDValue ArgValue,
+ MVT LocVT, const SDLoc &dl) {
+ assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
+ assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
+
+ if (Flags.isSExt())
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+ DAG.getValueType(ValVT));
+ else if (Flags.isZExt())
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+ DAG.getValueType(ValVT));
+
+ return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
+}
+
+SDValue PPCTargetLowering::LowerFormalArguments_AIX(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+ assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
+ CallConv == CallingConv::Fast) &&
+ "Unexpected calling convention!");
+
+ if (isVarArg)
+ report_fatal_error("This call type is unimplemented on AIX.");
+
+ if (getTargetMachine().Options.GuaranteedTailCallOpt)
+ report_fatal_error("Tail call support is unimplemented on AIX.");
+
+ if (useSoftFloat())
+ report_fatal_error("Soft float support is unimplemented on AIX.");
+
+ const PPCSubtarget &Subtarget =
+ static_cast<const PPCSubtarget &>(DAG.getSubtarget());
+ if (Subtarget.hasQPX())
+ report_fatal_error("QPX support is not supported on AIX.");
+
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ MachineFunction &MF = DAG.getMachineFunction();
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ // On AIX a minimum of 8 words is saved to the parameter save area.
+ const unsigned MinParameterSaveArea = 8 * PtrByteSize;
+ CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue ArgValue;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (VA.isRegLoc()) {
+ EVT ValVT = VA.getValVT();
+ MVT LocVT = VA.getLocVT();
+ MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
+ unsigned VReg =
+ MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
+ ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ if (ValVT.isScalarInteger() &&
+ (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
+ ArgValue =
+ truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
+ }
+ InVals.push_back(ArgValue);
+ } else {
+ report_fatal_error("Handling of formal arguments on the stack is "
+ "unimplemented!");
+ }
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea = CCInfo.getNextStackOffset();
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so
+ // that taking the difference between two stack areas will result in an
+ // aligned stack.
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ return Chain;
+}
SDValue PPCTargetLowering::LowerCall_AIX(
SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
@@ -6705,22 +7067,33 @@ SDValue PPCTargetLowering::LowerCall_AIX(
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite CS) const {
- assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
- "Unimplemented calling convention!");
- if (isVarArg || isPatchPoint)
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Cold ||
+ CallConv == CallingConv::Fast) && "Unexpected calling convention!");
+
+ if (isPatchPoint)
report_fatal_error("This call type is unimplemented on AIX.");
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- bool isPPC64 = PtrVT == MVT::i64;
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned NumOps = Outs.size();
+ const PPCSubtarget& Subtarget =
+ static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ if (Subtarget.hasQPX())
+ report_fatal_error("QPX is not supported on AIX.");
+ if (Subtarget.hasAltivec())
+ report_fatal_error("Altivec support is unimplemented on AIX.");
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, parameter list area.
- // On XCOFF, we start with 24/48, which is reserved space for
- // [SP][CR][LR][2 x reserved][TOC].
- unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ // Reserve space for the linkage save area (LSA) on the stack.
+ // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
+ // [SP][CR][LR][2 x reserved][TOC].
+ // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
+ const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+ CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
// The prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if the callee
@@ -6728,98 +7101,101 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
- unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+ const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
+ const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize;
// Adjust the stack pointer for the new arguments...
- // These operations are automatically eliminated by the prolog/epilog
- // inserter pass.
+ // These operations are automatically eliminated by the prolog/epilog pass.
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
- static const MCPhysReg GPR_32[] = { // 32-bit registers.
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10
- };
- static const MCPhysReg GPR_64[] = { // 64-bit registers.
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10
- };
-
- const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
- : array_lengthof(GPR_32);
- const unsigned NumFPRs = array_lengthof(FPR);
- assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
- "on AIX");
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
- const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
- unsigned GPR_idx = 0, FPR_idx = 0;
+ for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
+ CCValAssign &VA = ArgLocs[I++];
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ if (VA.isMemLoc())
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ if (!VA.isRegLoc())
+ report_fatal_error(
+ "Unexpected non-register location for function call argument.");
- if (isTailCall)
- report_fatal_error("Handling of tail call is unimplemented!");
- int SPDiff = 0;
+ SDValue Arg = OutVals[VA.getValNo()];
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (!VA.needsCustom()) {
+ switch (VA.getLocInfo()) {
+ default:
+ report_fatal_error("Unexpected argument extension type.");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- // Promote integers if needed.
- if (Arg.getValueType() == MVT::i1 ||
- (isPPC64 && Arg.getValueType() == MVT::i32)) {
- unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
+ continue;
}
- // Note: "by value" is code for passing a structure by value, not
- // basic types.
- if (Flags.isByVal())
- report_fatal_error("Passing structure by value is unimplemented!");
+ // Custom handling is used for GPR initializations for vararg float
+ // arguments.
+ assert(isVarArg && VA.getValVT().isFloatingPoint() &&
+ VA.getLocVT().isInteger() &&
+ "Unexpected custom register handling for calling convention.");
- switch (Arg.getSimpleValueType().SimpleTy) {
- default: llvm_unreachable("Unexpected ValueType for argument!");
- case MVT::i1:
- case MVT::i32:
- case MVT::i64:
- if (GPR_idx != NumGPRs)
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
- else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- break;
- case MVT::f32:
- case MVT::f64:
- if (FPR_idx != NumFPRs) {
- RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+ SDValue ArgAsInt =
+ DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
- // If we have any FPRs remaining, we may also have GPRs remaining.
- // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
- // GPRs.
- if (GPR_idx != NumGPRs)
- ++GPR_idx;
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
- ++GPR_idx;
- } else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- case MVT::v2f64:
- case MVT::v2i64:
- case MVT::v1i128:
- case MVT::f128:
- case MVT::v4f64:
- case MVT::v4i1:
- report_fatal_error("Handling of this parameter type is unimplemented!");
- }
- }
+ if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+ // f32 in 32-bit GPR
+ // f64 in 64-bit GPR
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
+ else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+ // f32 in 64-bit GPR.
+ RegsToPass.push_back(std::make_pair(
+ VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+ else {
+ // f64 in two 32-bit GPRs
+ // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
+ assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 &&
+ "Unexpected custom register for argument!");
+ CCValAssign &GPR1 = VA;
+ SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
+ DAG.getConstant(32, dl, MVT::i8));
+ RegsToPass.push_back(std::make_pair(
+ GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
+ assert(I != E && "A second custom GPR is expected!");
+ CCValAssign &GPR2 = ArgLocs[I++];
+ assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
+ GPR2.needsCustom() && "A second custom GPR is expected!");
+ RegsToPass.push_back(std::make_pair(
+ GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+ }
+ }
+
+ // For indirect calls, we need to save the TOC base to the stack for
+ // restoration after the call.
+ if (!isTailCall && !isPatchPoint &&
+ !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) {
+ const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
+ const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+ const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+ const unsigned TOCSaveOffset =
+ Subtarget.getFrameLowering()->getTOCSaveOffset();
- if (!isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee))
- report_fatal_error("Handling of indirect call is unimplemented!");
+ setUsesTOCBasePtr(DAG);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(
+ Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
+ }
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -6829,10 +7205,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
InFlag = Chain.getValue(1);
}
+ const int SPDiff = 0;
return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass,
+ InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins,
+ InVals, CS);
}
bool
@@ -7121,8 +7498,7 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
"Custom lowering only for i1 results");
SDLoc DL(Op);
- return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
- Op.getOperand(0));
+ return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
}
SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
@@ -7188,17 +7564,15 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
+ bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath;
+ bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath;
// We might be able to do better than this under some circumstances, but in
// general, fsel-based lowering of select is a finite-math-only optimization.
// For more information, see section F.3 of the 2.06 ISA specification.
- if (!DAG.getTarget().Options.NoInfsFPMath ||
- !DAG.getTarget().Options.NoNaNsFPMath)
+ // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the
+ // presence of infinities.
+ if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs))
return Op;
- // TODO: Propagate flags from the select rather than global settings.
- SDNodeFlags Flags;
- Flags.setNoInfs(true);
- Flags.setNoNaNs(true);
-
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
@@ -7207,6 +7581,27 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
SDLoc dl(Op);
+ if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
+ switch (CC) {
+ default:
+ // Not a min/max but with finite math, we may still be able to use fsel.
+ if (HasNoInfs && HasNoNaNs)
+ break;
+ return Op;
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
+ }
+ }
+
+ // TODO: Propagate flags from the select rather than global settings.
+ SDNodeFlags Flags;
+ Flags.setNoInfs(true);
+ Flags.setNoNaNs(true);
+
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
SDValue Sel1;
@@ -8055,8 +8450,6 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
/// SplatSize. Cast the result to VT.
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, const SDLoc &dl) {
- assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
-
static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
@@ -8376,29 +8769,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
// We have XXSPLTIB for constant splats one byte wide
- if (Subtarget.hasP9Vector() && SplatSize == 1) {
- // This is a splat of 1-byte elements with some elements potentially undef.
- // Rather than trying to match undef in the SDAG patterns, ensure that all
- // elements are the same constant.
- if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
- SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
- dl, MVT::i32));
- SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
- if (Op.getValueType() != MVT::v16i8)
- return DAG.getBitcast(Op.getValueType(), NewBV);
- return NewBV;
- }
-
- // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
- // detect that constant splats like v8i16: 0xABAB are really just splats
- // of a 1-byte constant. In this case, we need to convert the node to a
- // splat of v16i8 and a bitcast.
- if (Op.getValueType() != MVT::v16i8)
- return DAG.getBitcast(Op.getValueType(),
- DAG.getConstant(SplatBits, dl, MVT::v16i8));
-
- return Op;
- }
+ // FIXME: SplatBits is an unsigned int being cast to an int while passing it
+ // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
+ if (Subtarget.hasP9Vector() && SplatSize == 1)
+ return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
@@ -8930,19 +9304,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (Subtarget.hasP9Vector()) {
if (PPC::isXXBRHShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
- SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
+ SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
} else if (PPC::isXXBRWShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
- SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
+ SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
} else if (PPC::isXXBRDShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
- SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
+ SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
} else if (PPC::isXXBRQShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
- SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
+ SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
}
}
@@ -9503,7 +9877,7 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
Op.getOperand(0));
// XXBRD
- Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
// MFVSRD
int VectorIndex = 0;
if (Subtarget.isLittleEndian())
@@ -10845,9 +11219,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
DebugLoc dl = MI.getDebugLoc();
TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
- } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
- MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_CC_F4 ||
+ } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
MI.getOpcode() == PPC::SELECT_CC_F8 ||
MI.getOpcode() == PPC::SELECT_CC_F16 ||
MI.getOpcode() == PPC::SELECT_CC_QFRC ||
@@ -10859,8 +11231,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_CC_VSRC ||
MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
MI.getOpcode() == PPC::SELECT_CC_SPE ||
- MI.getOpcode() == PPC::SELECT_I4 ||
- MI.getOpcode() == PPC::SELECT_I8 ||
MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
MI.getOpcode() == PPC::SELECT_F16 ||
@@ -11397,28 +11767,28 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
- } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
- unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
- ? PPC::ANDIo8
- : PPC::ANDIo;
- bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
+ } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
+ unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
+ ? PPC::ANDI8_rec
+ : PPC::ANDI_rec;
+ bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
Register Dest = RegInfo.createVirtualRegister(
- Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
+ Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
- DebugLoc dl = MI.getDebugLoc();
- BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+ DebugLoc Dl = MI.getDebugLoc();
+ BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
.addReg(MI.getOperand(1).getReg())
.addImm(1);
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
MI.getOperand(0).getReg())
- .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
} else if (MI.getOpcode() == PPC::TCHECK_RET) {
DebugLoc Dl = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -11638,7 +12008,7 @@ unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are two or more FDIVs (for embedded cores with only
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default:
return 3;
case PPC::DIR_440:
@@ -14111,7 +14481,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default: break;
case PPC::DIR_970:
case PPC::DIR_PWR4:
@@ -14121,7 +14491,8 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
case PPC::DIR_PWR6X:
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
- case PPC::DIR_PWR9: {
+ case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE: {
if (!ML)
break;
@@ -14309,6 +14680,17 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::VSFRCRegClass);
}
+ // If we name a VSX register, we can't defer to the base class because it
+ // will not recognize the correct register (their names will be VSL{0-31}
+ // and V{0-31} so they won't match). So we match them here.
+ if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
+ int VSNum = atoi(Constraint.data() + 3);
+ assert(VSNum >= 0 && VSNum <= 63 &&
+ "Attempted to access a vsr out of range");
+ if (VSNum < 32)
+ return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
+ return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
+ }
std::pair<unsigned, const TargetRegisterClass *> R =
TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
@@ -14513,16 +14895,15 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
bool isPPC64 = Subtarget.isPPC64();
bool IsDarwinABI = Subtarget.isDarwinABI();
- if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
- (!isPPC64 && VT != MVT::i32))
+ bool is64Bit = isPPC64 && VT == LLT::scalar(64);
+ if (!is64Bit && VT != LLT::scalar(32))
report_fatal_error("Invalid register global variable type");
- bool is64Bit = isPPC64 && VT == MVT::i64;
Register Reg = StringSwitch<Register>(RegName)
.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
.Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2)
@@ -14870,6 +15251,9 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
+ if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+ return false;
+
if (VT.getSimpleVT().isVector()) {
if (Subtarget.hasVSX()) {
if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
@@ -14889,7 +15273,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
-bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
@@ -15278,7 +15663,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
return SDValue();
auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
- switch (this->Subtarget.getDarwinDirective()) {
+ switch (this->Subtarget.getCPUDirective()) {
default:
// TODO: enhance the condition for subtarget before pwr8
return false;
@@ -15288,6 +15673,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
// vector 7 2 2
return true;
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE:
// type mul add shl
// scalar 5 2 2
// vector 7 2 2
@@ -15357,12 +15743,6 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!CI->isTailCall())
return false;
- // If tail calls are disabled for the caller then we are done.
- const Function *Caller = CI->getParent()->getParent();
- auto Attr = Caller->getFnAttribute("disable-tail-calls");
- if (Attr.getValueAsString() == "true")
- return false;
-
// If sibling calls have been disabled and tail-calls aren't guaranteed
// there is no reason to duplicate.
auto &TM = getTargetMachine();
@@ -15375,6 +15755,7 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return false;
// Make sure the callee and caller calling conventions are eligible for tco.
+ const Function *Caller = CI->getParent()->getParent();
if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
CI->getCallingConv()))
return false;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 62922ea2d4c4..e0c381827b87 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -43,460 +43,475 @@ namespace llvm {
// that come before it. For example, ADD or MUL should be placed before
// the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
// after it.
- enum NodeType : unsigned {
- // Start the numbering where the builtin ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- /// FSEL - Traditional three-operand fsel node.
- ///
- FSEL,
-
- /// FCFID - The FCFID instruction, taking an f64 operand and producing
- /// and f64 value containing the FP representation of the integer that
- /// was temporarily in the f64 operand.
- FCFID,
-
- /// Newer FCFID[US] integer-to-floating-point conversion instructions for
- /// unsigned integers and single-precision outputs.
- FCFIDU, FCFIDS, FCFIDUS,
-
- /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
- /// operand, producing an f64 value containing the integer representation
- /// of that FP value.
- FCTIDZ, FCTIWZ,
-
- /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
- /// unsigned integers with round toward zero.
- FCTIDUZ, FCTIWUZ,
-
- /// Floating-point-to-interger conversion instructions
- FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR,
-
- /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
- /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
- VEXTS,
-
- /// SExtVElems, takes an input vector of a smaller type and sign
- /// extends to an output vector of a larger type.
- SExtVElems,
-
- /// Reciprocal estimate instructions (unary FP ops).
- FRE, FRSQRTE,
-
- // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
- // three v4f32 operands and producing a v4f32 result.
- VMADDFP, VNMSUBFP,
-
- /// VPERM - The PPC VPERM Instruction.
- ///
- VPERM,
-
- /// XXSPLT - The PPC VSX splat instructions
- ///
- XXSPLT,
-
- /// VECINSERT - The PPC vector insert instruction
- ///
- VECINSERT,
-
- /// XXREVERSE - The PPC VSX reverse instruction
- ///
- XXREVERSE,
-
- /// VECSHL - The PPC vector shift left instruction
- ///
- VECSHL,
-
- /// XXPERMDI - The PPC XXPERMDI instruction
- ///
- XXPERMDI,
-
- /// The CMPB instruction (takes two operands of i32 or i64).
- CMPB,
-
- /// Hi/Lo - These represent the high and low 16-bit parts of a global
- /// address respectively. These nodes have two operands, the first of
- /// which must be a TargetGlobalAddress, and the second of which must be a
- /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
- /// though these are usually folded into other nodes.
- Hi, Lo,
-
- /// The following two target-specific nodes are used for calls through
- /// function pointers in the 64-bit SVR4 ABI.
-
- /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
- /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
- /// compute an allocation on the stack.
- DYNALLOC,
-
- /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
- /// compute an offset from native SP to the address of the most recent
- /// dynamic alloca.
- DYNAREAOFFSET,
-
- /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
- /// at function entry, used for PIC code.
- GlobalBaseReg,
-
- /// These nodes represent PPC shifts.
- ///
- /// For scalar types, only the last `n + 1` bits of the shift amounts
- /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
- /// for exact behaviors.
- ///
- /// For vector types, only the last n bits are used. See vsld.
- SRL, SRA, SHL,
-
- /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
- /// word and shift left immediate.
- EXTSWSLI,
-
- /// The combination of sra[wd]i and addze used to implemented signed
- /// integer division by a power of 2. The first operand is the dividend,
- /// and the second is the constant shift amount (representing the
- /// divisor).
- SRA_ADDZE,
-
- /// CALL - A direct function call.
- /// CALL_NOP is a call with the special NOP which follows 64-bit
- /// SVR4 calls and 32-bit/64-bit AIX calls.
- CALL, CALL_NOP,
-
- /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
- /// MTCTR instruction.
- MTCTR,
-
- /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
- /// BCTRL instruction.
- BCTRL,
-
- /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
- /// instruction and the TOC reload required on SVR4 PPC64.
- BCTRL_LOAD_TOC,
-
- /// Return with a flag operand, matched by 'blr'
- RET_FLAG,
-
- /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
- /// This copies the bits corresponding to the specified CRREG into the
- /// resultant GPR. Bits corresponding to other CR regs are undefined.
- MFOCRF,
-
- /// Direct move from a VSX register to a GPR
- MFVSR,
-
- /// Direct move from a GPR to a VSX register (algebraic)
- MTVSRA,
-
- /// Direct move from a GPR to a VSX register (zero)
- MTVSRZ,
-
- /// Direct move of 2 consecutive GPR to a VSX register.
- BUILD_FP128,
-
- /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
- /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
- /// unsupported for this target.
- /// Merge 2 GPRs to a single SPE register.
- BUILD_SPE64,
-
- /// Extract SPE register component, second argument is high or low.
- EXTRACT_SPE,
-
- /// Extract a subvector from signed integer vector and convert to FP.
- /// It is primarily used to convert a (widened) illegal integer vector
- /// type to a legal floating point vector type.
- /// For example v2i32 -> widened to v4i32 -> v2f64
- SINT_VEC_TO_FP,
-
- /// Extract a subvector from unsigned integer vector and convert to FP.
- /// As with SINT_VEC_TO_FP, used for converting illegal types.
- UINT_VEC_TO_FP,
-
- // FIXME: Remove these once the ANDI glue bug is fixed:
- /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
- /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
- /// implement truncation of i32 or i64 to i1.
- ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
-
- // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
- // target (returns (Lo, Hi)). It takes a chain operand.
- READ_TIME_BASE,
-
- // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
- EH_SJLJ_SETJMP,
-
- // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
- EH_SJLJ_LONGJMP,
-
- /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
- /// instructions. For lack of better number, we use the opcode number
- /// encoding for the OPC field to identify the compare. For example, 838
- /// is VCMPGTSH.
- VCMP,
-
- /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
- /// altivec VCMP*o instructions. For lack of better number, we use the
- /// opcode number encoding for the OPC field to identify the compare. For
- /// example, 838 is VCMPGTSH.
- VCMPo,
-
- /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
- /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
- /// condition register to branch on, OPC is the branch opcode to use (e.g.
- /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
- /// an optional input flag argument.
- COND_BRANCH,
-
- /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
- /// loops.
- BDNZ, BDZ,
-
- /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
- /// towards zero. Used only as part of the long double-to-int
- /// conversion sequence.
- FADDRTZ,
-
- /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
- MFFS,
-
- /// TC_RETURN - A tail call return.
- /// operand #0 chain
- /// operand #1 callee (register or absolute)
- /// operand #2 stack adjustment
- /// operand #3 optional in flag
- TC_RETURN,
-
- /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
- CR6SET,
- CR6UNSET,
-
- /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
- /// for non-position independent code on PPC32.
- PPC32_GOT,
-
- /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
- /// local dynamic TLS and position indendepent code on PPC32.
- PPC32_PICGOT,
-
- /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
- /// TLS model, produces an ADDIS8 instruction that adds the GOT
- /// base to sym\@got\@tprel\@ha.
- ADDIS_GOT_TPREL_HA,
-
- /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
- /// TLS model, produces a LD instruction with base register G8RReg
- /// and offset sym\@got\@tprel\@l. This completes the addition that
- /// finds the offset of "sym" relative to the thread pointer.
- LD_GOT_TPREL_L,
-
- /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
- /// model, produces an ADD instruction that adds the contents of
- /// G8RReg to the thread pointer. Symbol contains a relocation
- /// sym\@tls which is to be replaced by the thread pointer and
- /// identifies to the linker that the instruction is part of a
- /// TLS sequence.
- ADD_TLS,
-
- /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds the GOT base
- /// register to sym\@got\@tlsgd\@ha.
- ADDIS_TLSGD_HA,
-
- /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
- /// ADDIS_TLSGD_L_ADDR until after register assignment.
- ADDI_TLSGD_L,
-
- /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
- /// ADDIS_TLSGD_L_ADDR until after register assignment.
- GET_TLS_ADDR,
-
- /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
- /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
- /// register assignment.
- ADDI_TLSGD_L_ADDR,
-
- /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds the GOT base
- /// register to sym\@got\@tlsld\@ha.
- ADDIS_TLSLD_HA,
-
- /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
- /// ADDIS_TLSLD_L_ADDR until after register assignment.
- ADDI_TLSLD_L,
-
- /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
- /// ADDIS_TLSLD_L_ADDR until after register assignment.
- GET_TLSLD_ADDR,
-
- /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
- /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
- /// following register assignment.
- ADDI_TLSLD_L_ADDR,
-
- /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
- /// model, produces an ADDIS8 instruction that adds X3 to
- /// sym\@dtprel\@ha.
- ADDIS_DTPREL_HA,
-
- /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
- /// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym\@got\@dtprel\@l.
- ADDI_DTPREL_L,
-
- /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
- /// during instruction selection to optimize a BUILD_VECTOR into
- /// operations on splats. This is necessary to avoid losing these
- /// optimizations due to constant folding.
- VADD_SPLAT,
-
- /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
- /// operand identifies the operating system entry point.
- SC,
-
- /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
- CLRBHRB,
-
- /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
- /// history rolling buffer entry.
- MFBHRBE,
-
- /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
- RFEBB,
-
- /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
- /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
- /// or stxvd2x instruction. The chain is necessary because the
- /// sequence replaces a load and needs to provide the same number
- /// of outputs.
- XXSWAPD,
-
- /// An SDNode for swaps that are not associated with any loads/stores
- /// and thereby have no chain.
- SWAP_NO_CHAIN,
-
- /// An SDNode for Power9 vector absolute value difference.
- /// operand #0 vector
- /// operand #1 vector
- /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
- /// the most significant bit for signed i32
- ///
- /// Power9 VABSD* instructions are designed to support unsigned integer
- /// vectors (byte/halfword/word), if we want to make use of them for signed
- /// integer vectors, we have to flip their sign bits first. To flip sign bit
- /// for byte/halfword integer vector would become inefficient, but for word
- /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
- /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
- /// => VABSDUW((XVNEGSP a), (XVNEGSP b))
- VABSD,
-
- /// QVFPERM = This corresponds to the QPX qvfperm instruction.
- QVFPERM,
-
- /// QVGPCI = This corresponds to the QPX qvgpci instruction.
- QVGPCI,
-
- /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
- QVALIGNI,
-
- /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
- QVESPLATI,
-
- /// QBFLT = Access the underlying QPX floating-point boolean
- /// representation.
- QBFLT,
-
- /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
- /// lower (IDX=1) half of v4f32 to v2f64.
- FP_EXTEND_HALF,
-
- /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
- /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
- /// the GPRC input, then stores it through Ptr. Type can be either i16 or
- /// i32.
- STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
-
- /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
- /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
- /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
- /// or i32.
- LBRX,
-
- /// STFIWX - The STFIWX instruction. The first operand is an input token
- /// chain, then an f64 value to store, then an address to store it to.
- STFIWX,
-
- /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
- /// load which sign-extends from a 32-bit integer value into the
- /// destination 64-bit register.
- LFIWAX,
-
- /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
- /// load which zero-extends from a 32-bit integer value into the
- /// destination 64-bit register.
- LFIWZX,
-
- /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
- /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
- /// This can be used for converting loaded integers to floating point.
- LXSIZX,
-
- /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
- /// chain, then an f64 value to store, then an address to store it to,
- /// followed by a byte-width for the store.
- STXSIX,
-
- /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
- /// Maps directly to an lxvd2x instruction that will be followed by
- /// an xxswapd.
- LXVD2X,
-
- /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
- /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
- /// the vector type to load vector in big-endian element order.
- LOAD_VEC_BE,
-
- /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
- /// v2f32 value into the lower half of a VSR register.
- LD_VSX_LH,
-
- /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
- /// instructions such as LXVDSX, LXVWSX.
- LD_SPLAT,
-
- /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
- /// Maps directly to an stxvd2x instruction that will be preceded by
- /// an xxswapd.
- STXVD2X,
-
- /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
- /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
- /// the vector type to store vector in big-endian element order.
- STORE_VEC_BE,
-
- /// Store scalar integers from VSR.
- ST_VSR_SCAL_INT,
-
- /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
- /// The 4xf32 load used for v4i1 constants.
- QVLFSb,
-
- /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
- /// except they ensure that the compare input is zero-extended for
- /// sub-word versions because the atomic loads zero-extend.
- ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
-
- /// GPRC = TOC_ENTRY GA, TOC
- /// Loads the entry for GA from the TOC, where the TOC base is given by
- /// the last operand.
- TOC_ENTRY
- };
+ enum NodeType : unsigned {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// XSMAXCDP, XSMINCDP - C-type min/max instructions.
+ XSMAXCDP,
+ XSMINCDP,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+ /// unsigned integers and single-precision outputs.
+ FCFIDU,
+ FCFIDS,
+ FCFIDUS,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ,
+ FCTIWZ,
+
+ /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+ /// unsigned integers with round toward zero.
+ FCTIDUZ,
+ FCTIWUZ,
+
+ /// Floating-point-to-interger conversion instructions
+ FP_TO_UINT_IN_VSR,
+ FP_TO_SINT_IN_VSR,
+
+ /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
+ /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
+ VEXTS,
+
+ /// SExtVElems, takes an input vector of a smaller type and sign
+ /// extends to an output vector of a larger type.
+ SExtVElems,
+
+ /// Reciprocal estimate instructions (unary FP ops).
+ FRE,
+ FRSQRTE,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP,
+ VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// XXSPLT - The PPC VSX splat instructions
+ ///
+ XXSPLT,
+
+ /// VECINSERT - The PPC vector insert instruction
+ ///
+ VECINSERT,
+
+ /// VECSHL - The PPC vector shift left instruction
+ ///
+ VECSHL,
+
+ /// XXPERMDI - The PPC XXPERMDI instruction
+ ///
+ XXPERMDI,
+
+ /// The CMPB instruction (takes two operands of i32 or i64).
+ CMPB,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi,
+ Lo,
+
+ /// The following two target-specific nodes are used for calls through
+ /// function pointers in the 64-bit SVR4 ABI.
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an offset from native SP to the address of the most recent
+ /// dynamic alloca.
+ DYNAREAOFFSET,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent PPC shifts.
+ ///
+ /// For scalar types, only the last `n + 1` bits of the shift amounts
+ /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
+ /// for exact behaviors.
+ ///
+ /// For vector types, only the last n bits are used. See vsld.
+ SRL,
+ SRA,
+ SHL,
+
+ /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+ /// word and shift left immediate.
+ EXTSWSLI,
+
+ /// The combination of sra[wd]i and addze used to implemented signed
+ /// integer division by a power of 2. The first operand is the dividend,
+ /// and the second is the constant shift amount (representing the
+ /// divisor).
+ SRA_ADDZE,
+
+ /// CALL - A direct function call.
+ /// CALL_NOP is a call with the special NOP which follows 64-bit
+ /// SVR4 calls and 32-bit/64-bit AIX calls.
+ CALL,
+ CALL_NOP,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
+ /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX
+ /// and 64-bit AIX.
+ BCTRL_LOAD_TOC,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFOCRF,
+
+ /// Direct move from a VSX register to a GPR
+ MFVSR,
+
+ /// Direct move from a GPR to a VSX register (algebraic)
+ MTVSRA,
+
+ /// Direct move from a GPR to a VSX register (zero)
+ MTVSRZ,
+
+ /// Direct move of 2 consecutive GPR to a VSX register.
+ BUILD_FP128,
+
+ /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+ /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+ /// unsupported for this target.
+ /// Merge 2 GPRs to a single SPE register.
+ BUILD_SPE64,
+
+ /// Extract SPE register component, second argument is high or low.
+ EXTRACT_SPE,
+
+ /// Extract a subvector from signed integer vector and convert to FP.
+ /// It is primarily used to convert a (widened) illegal integer vector
+ /// type to a legal floating point vector type.
+ /// For example v2i32 -> widened to v4i32 -> v2f64
+ SINT_VEC_TO_FP,
+
+ /// Extract a subvector from unsigned integer vector and convert to FP.
+ /// As with SINT_VEC_TO_FP, used for converting illegal types.
+ UINT_VEC_TO_FP,
+
+ // FIXME: Remove these once the ANDI glue bug is fixed:
+ /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+ /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+ /// implement truncation of i32 or i64 to i1.
+ ANDI_rec_1_EQ_BIT,
+ ANDI_rec_1_GT_BIT,
+
+ // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
+ // target (returns (Lo, Hi)). It takes a chain operand.
+ READ_TIME_BASE,
+
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+ /// loops.
+ BDNZ,
+ BDZ,
+
+ /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+ /// towards zero. Used only as part of the long double-to-int
+ /// conversion sequence.
+ FADDRTZ,
+
+ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+ MFFS,
+
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
+ /// for non-position independent code on PPC32.
+ PPC32_GOT,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+ /// local dynamic TLS and position indendepent code on PPC32.
+ PPC32_PICGOT,
+
+ /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
+ /// TLS model, produces an ADDIS8 instruction that adds the GOT
+ /// base to sym\@got\@tprel\@ha.
+ ADDIS_GOT_TPREL_HA,
+
+ /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+ /// TLS model, produces a LD instruction with base register G8RReg
+ /// and offset sym\@got\@tprel\@l. This completes the addition that
+ /// finds the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL_L,
+
+ /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+ /// model, produces an ADD instruction that adds the contents of
+ /// G8RReg to the thread pointer. Symbol contains a relocation
+ /// sym\@tls which is to be replaced by the thread pointer and
+ /// identifies to the linker that the instruction is part of a
+ /// TLS sequence.
+ ADD_TLS,
+
+ /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym\@got\@tlsgd\@ha.
+ ADDIS_TLSGD_HA,
+
+ /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
+ ADDI_TLSGD_L,
+
+ /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+ /// register assignment.
+ ADDI_TLSGD_L_ADDR,
+
+ /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym\@got\@tlsld\@ha.
+ ADDIS_TLSLD_HA,
+
+ /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
+ ADDI_TLSLD_L,
+
+ /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+ /// following register assignment.
+ ADDI_TLSLD_L_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds X3 to
+ /// sym\@dtprel\@ha.
+ ADDIS_DTPREL_HA,
+
+ /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@dtprel\@l.
+ ADDI_DTPREL_L,
+
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
+
+ /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
+ /// operand identifies the operating system entry point.
+ SC,
+
+ /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
+ CLRBHRB,
+
+ /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
+ /// history rolling buffer entry.
+ MFBHRBE,
+
+ /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
+ RFEBB,
+
+ /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
+ /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
+ /// or stxvd2x instruction. The chain is necessary because the
+ /// sequence replaces a load and needs to provide the same number
+ /// of outputs.
+ XXSWAPD,
+
+ /// An SDNode for swaps that are not associated with any loads/stores
+ /// and thereby have no chain.
+ SWAP_NO_CHAIN,
+
+ /// An SDNode for Power9 vector absolute value difference.
+ /// operand #0 vector
+ /// operand #1 vector
+ /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+ /// the most significant bit for signed i32
+ ///
+ /// Power9 VABSD* instructions are designed to support unsigned integer
+ /// vectors (byte/halfword/word), if we want to make use of them for signed
+ /// integer vectors, we have to flip their sign bits first. To flip sign bit
+ /// for byte/halfword integer vector would become inefficient, but for word
+ /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+ /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
+ /// => VABSDUW((XVNEGSP a), (XVNEGSP b))
+ VABSD,
+
+ /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+ QVFPERM,
+
+ /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+ QVGPCI,
+
+ /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+ QVALIGNI,
+
+ /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+ QVESPLATI,
+
+ /// QBFLT = Access the underlying QPX floating-point boolean
+ /// representation.
+ QBFLT,
+
+ /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
+ /// lower (IDX=1) half of v4f32 to v2f64.
+ FP_EXTEND_HALF,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
+ /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+ /// load which zero-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWZX,
+
+ /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
+ /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
+ /// This can be used for converting loaded integers to floating point.
+ LXSIZX,
+
+ /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
+ /// chain, then an f64 value to store, then an address to store it to,
+ /// followed by a byte-width for the store.
+ STXSIX,
+
+ /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to an lxvd2x instruction that will be followed by
+ /// an xxswapd.
+ LXVD2X,
+
+ /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
+ /// the vector type to load vector in big-endian element order.
+ LOAD_VEC_BE,
+
+ /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+ /// v2f32 value into the lower half of a VSR register.
+ LD_VSX_LH,
+
+ /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
+ /// instructions such as LXVDSX, LXVWSX.
+ LD_SPLAT,
+
+ /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to an stxvd2x instruction that will be preceded by
+ /// an xxswapd.
+ STXVD2X,
+
+ /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
+ /// the vector type to store vector in big-endian element order.
+ STORE_VEC_BE,
+
+ /// Store scalar integers from VSR.
+ ST_VSR_SCAL_INT,
+
+ /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+ /// The 4xf32 load used for v4i1 constants.
+ QVLFSb,
+
+ /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
+ /// except they ensure that the compare input is zero-extended for
+ /// sub-word versions because the atomic loads zero-extend.
+ ATOMIC_CMP_SWAP_8,
+ ATOMIC_CMP_SWAP_16,
+
+ /// GPRC = TOC_ENTRY GA, TOC
+ /// Loads the entry for GA from the TOC, where the TOC base is given by
+ /// the last operand.
+ TOC_ENTRY
+ };
} // end namespace PPCISD
@@ -647,6 +662,10 @@ namespace llvm {
return true;
}
+ bool isEqualityCmpFoldedWithSignedCmp() const override {
+ return false;
+ }
+
bool hasAndNotCompare(SDValue) const override {
return true;
}
@@ -733,7 +752,7 @@ namespace llvm {
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
- Register getRegisterByName(const char* RegName, EVT VT,
+ Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
@@ -900,7 +919,8 @@ namespace llvm {
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
/// expanded to fmul + fadd.
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
@@ -1113,6 +1133,10 @@ namespace llvm {
SelectionDAG &DAG, SDValue ArgVal,
const SDLoc &dl) const;
+ SDValue LowerFormalArguments_AIX(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerFormalArguments_Darwin(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index f16187149d36..43431a1e0069 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -71,13 +71,14 @@ def SRL64 : SDNodeXForm<imm, [{
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isReturn = 1, Uses = [LR8, RM] in
+ let isReturn = 1, isPredicable = 1, Uses = [LR8, RM] in
def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>, Requires<[In64BitMode]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
- def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
- []>,
- Requires<[In64BitMode]>;
+ let isPredicable = 1 in
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
"b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
[]>,
@@ -141,9 +142,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
[(PPCcall_nop (i64 imm:$func))]>;
}
let Uses = [CTR8, RM] in {
- def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
- "bctrl", IIC_BrB, [(PPCbctrl)]>,
- Requires<[In64BitMode]>;
+ let isPredicable = 1 in
+ def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
+ Requires<[In64BitMode]>;
let isCodeGenOnly = 1 in {
def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
@@ -253,7 +255,7 @@ def LDARX : XForm_1_memOp<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
// Instruction to support lock versions of atomics
// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+ "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isRecordForm;
let hasExtraDefRegAllocReq = 1 in
def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
@@ -263,7 +265,7 @@ def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
+ "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isRecordForm;
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
@@ -410,6 +412,7 @@ def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri
def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
[(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
+let hasSideEffects = 0 in {
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
"mtlr $rS", IIC_SprMTSPR>,
@@ -421,6 +424,7 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
} // Interpretation64Bit
+}
//===----------------------------------------------------------------------===//
// Fixed point instructions.
@@ -474,14 +478,14 @@ defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
// Logical ops with immediate.
let Defs = [CR0] in {
-def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+def ANDI8_rec : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
"andi. $dst, $src1, $src2", IIC_IntGeneral,
[(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
- isDOT;
-def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ isRecordForm;
+def ANDIS8_rec : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
"andis. $dst, $src1, $src2", IIC_IntGeneral,
[(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
- isDOT;
+ isRecordForm;
}
def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
"ori $dst, $src1, $src2", IIC_IntSimple,
@@ -497,9 +501,9 @@ def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
[(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
let isCommutable = 1 in
-defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "add", "$rT, $rA, $rB", IIC_IntSimple,
- [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+defm ADD8 : XOForm_1rx<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
+ [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model. We need to forbid r0 here -
// while it works for add just fine, the linker can relax this to local-exec
@@ -576,9 +580,9 @@ defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"subfc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
PPC970_DGroup_Cracked;
-defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subf", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+defm SUBF8 : XOForm_1rx<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
"neg", "$rT, $rA", IIC_IntSimple,
[(set i64:$rT, (ineg i64:$rA))]>;
@@ -777,10 +781,10 @@ defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divdu", "$rT, $rA, $rB", IIC_IntDivD,
[(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
-def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divde $rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
- isPPC64, Requires<[HasExtDiv]>;
+defm DIVDE : XOForm_1rcr<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
let Predicates = [IsISA3_0] in {
def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
@@ -815,24 +819,14 @@ def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
[(set i64:$rT, (urem i64:$rA, i64:$rB))]>;
}
-let Defs = [CR0] in
-def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divde. $rT, $rA, $rB", IIC_IntDivD,
- []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
- isPPC64, Requires<[HasExtDiv]>;
-def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdeu $rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
- isPPC64, Requires<[HasExtDiv]>;
-let Defs = [CR0] in
-def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdeu. $rT, $rA, $rB", IIC_IntDivD,
- []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
- isPPC64, Requires<[HasExtDiv]>;
+defm DIVDEU : XOForm_1rcr<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
let isCommutable = 1 in
-defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
- [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+defm MULLD : XOForm_1rx<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
+ [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
"mulli $rD, $rA, $imm", IIC_IntMulLI,
@@ -943,7 +937,7 @@ def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
[(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// For fast-isel:
-let isCodeGenOnly = 1, mayLoad = 1 in {
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
"lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
PPC970_DGroup_Cracked;
@@ -1469,7 +1463,7 @@ class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty,
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def CP_COPY8 : X_L1_RA5_RB5<31, 774, "copy" , g8rc, IIC_LdStCOPY, []>;
def CP_PASTE8 : X_L1_RA5_RB5<31, 902, "paste" , g8rc, IIC_LdStPASTE, []>;
-def CP_PASTE8o : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isDOT;
+def CP_PASTE8_rec : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isRecordForm;
}
// SLB Invalidate Entry Global
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index fd3fc2af2327..f94816a35f79 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -261,6 +261,11 @@ def vecspltisw : PatLeaf<(build_vector), [{
return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != nullptr;
}], VSPLTISW_get_imm>;
+def immEQOneV : PatLeaf<(build_vector), [{
+ if (ConstantSDNode *C = cast<BuildVectorSDNode>(N)->getConstantSplatNode())
+ return C->isOne();
+ return false;
+}]>;
//===----------------------------------------------------------------------===//
// Helpers for defining instructions that directly correspond to intrinsics.
@@ -518,19 +523,19 @@ def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vcfsx $vD, $vB, $UIMM", IIC_VecFP,
[(set v4f32:$vD,
- (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
+ (int_ppc_altivec_vcfsx v4i32:$vB, timm:$UIMM))]>;
def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vcfux $vD, $vB, $UIMM", IIC_VecFP,
[(set v4f32:$vD,
- (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
+ (int_ppc_altivec_vcfux v4i32:$vB, timm:$UIMM))]>;
def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vctsxs $vD, $vB, $UIMM", IIC_VecFP,
[(set v4i32:$vD,
- (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
+ (int_ppc_altivec_vctsxs v4f32:$vB, timm:$UIMM))]>;
def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vctuxs $vD, $vB, $UIMM", IIC_VecFP,
[(set v4i32:$vD,
- (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
+ (int_ppc_altivec_vctuxs v4f32:$vB, timm:$UIMM))]>;
// Defines with the UIM field set to 0 for floating-point
// to integer (fp_to_sint/fp_to_uint) conversions and integer
@@ -706,7 +711,7 @@ def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vspltw $vD, $vB, $UIMM", IIC_VecPerm,
[(set v16i8:$vD,
(vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
"vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>;
def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
@@ -789,37 +794,37 @@ class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
// f32 element comparisons.0
def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
-def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPBFP_rec : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
-def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPEQFP_rec : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
-def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP_rec : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
-def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP_rec : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
// i8 element comparisons.
def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
-def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPEQUB_rec : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
-def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB_rec : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
-def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB_rec : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
// i16 element comparisons.
def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
-def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPEQUH_rec : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
-def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH_rec : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
-def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH_rec : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
// i32 element comparisons.
def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
-def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPEQUW_rec : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
-def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW_rec : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
-def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW_rec : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
@@ -856,6 +861,14 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
def : InstAlias<"vmr $vD, $vA", (VOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
+// Rotates.
+def : Pat<(v16i8 (rotl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VRLB v16i8:$vA, v16i8:$vB))>;
+def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VRLH v8i16:$vA, v8i16:$vB))>;
+def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
+
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
@@ -1092,6 +1105,20 @@ def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)),
def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)),
(VSEL $vC, $vB, $vA)>;
+// Vector Integer Average Instructions
+def : Pat<(v4i32 (sra (sub v4i32:$vA, (vnot_ppc v4i32:$vB)),
+ (v4i32 (immEQOneV)))), (v4i32 (VAVGSW $vA, $vB))>;
+def : Pat<(v8i16 (sra (sub v8i16:$vA, (v8i16 (bitconvert(vnot_ppc v4i32:$vB)))),
+ (v8i16 (immEQOneV)))), (v8i16 (VAVGSH $vA, $vB))>;
+def : Pat<(v16i8 (sra (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
+ (v16i8 (immEQOneV)))), (v16i8 (VAVGSB $vA, $vB))>;
+def : Pat<(v4i32 (srl (sub v4i32:$vA, (vnot_ppc v4i32:$vB)),
+ (v4i32 (immEQOneV)))), (v4i32 (VAVGUW $vA, $vB))>;
+def : Pat<(v8i16 (srl (sub v8i16:$vA, (v8i16 (bitconvert(vnot_ppc v4i32:$vB)))),
+ (v8i16 (immEQOneV)))), (v8i16 (VAVGUH $vA, $vB))>;
+def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
+ (v16i8 (immEQOneV)))), (v16i8 (VAVGUB $vA, $vB))>;
+
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
@@ -1140,9 +1167,13 @@ def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
(VMRGOW $vB, $vA)>;
+// Vector rotates.
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+
+def : Pat<(v2i64 (rotl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VRLD v2i64:$vA, v2i64:$vB))>;
// Vector shifts
-def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsld $vD, $vA, $vB", IIC_VecGeneral, []>;
def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
@@ -1245,11 +1276,11 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
// i64 element comparisons.
def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
-def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPEQUD_rec : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
-def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD_rec : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
-def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD_rec : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
// The cryptography instructions that do not require Category:Vector.Crypto
def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
@@ -1313,21 +1344,21 @@ let Predicates = [HasP9Altivec] in {
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
-def VCMPNEBo : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
+def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
def VCMPNEZB : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
-def VCMPNEZBo : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
+def VCMPNEZB_rec : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
// i16 element comparisons.
def VCMPNEH : VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>;
-def VCMPNEHo : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
+def VCMPNEH_rec : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
def VCMPNEZH : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
-def VCMPNEZHo : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
+def VCMPNEZH_rec : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
// i32 element comparisons.
def VCMPNEW : VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>;
-def VCMPNEWo : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
+def VCMPNEW_rec : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
def VCMPNEZW : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
-def VCMPNEZWo : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
+def VCMPNEZW_rec : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
// VX-Form: [PO VRT / UIM VRB XO].
// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
@@ -1347,12 +1378,14 @@ def VEXTRACTUW : VX1_VT5_UIM5_VB5<653, "vextractuw", []>;
def VEXTRACTD : VX1_VT5_UIM5_VB5<717, "vextractd" , []>;
// Vector Extract Unsigned Byte/Halfword/Word Left/Right-Indexed
+let hasSideEffects = 0 in {
def VEXTUBLX : VX1_RT5_RA5_VB5<1549, "vextublx", []>;
def VEXTUBRX : VX1_RT5_RA5_VB5<1805, "vextubrx", []>;
def VEXTUHLX : VX1_RT5_RA5_VB5<1613, "vextuhlx", []>;
def VEXTUHRX : VX1_RT5_RA5_VB5<1869, "vextuhrx", []>;
def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>;
def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>;
+}
// Vector Insert Element Instructions
def VINSERTB : VXForm_1<781, (outs vrrc:$vD),
@@ -1410,6 +1443,12 @@ let isCodeGenOnly = 1 in {
def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>;
}
+def : Pat<(v4i32 (sext_inreg v4i32:$VRB, v4i8)), (v4i32 (VEXTSB2W $VRB))>;
+def : Pat<(v4i32 (sext_inreg v4i32:$VRB, v4i16)), (v4i32 (VEXTSH2W $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i8)), (v2i64 (VEXTSB2D $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i16)), (v2i64 (VEXTSH2D $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i32)), (v2i64 (VEXTSW2D $VRB))>;
+
// Vector Integer Negate
def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw",
[(set v4i32:$vD,
@@ -1496,18 +1535,18 @@ class VX_VT5_EO5_VB5_XO9_o<bits<5> eo, bits<9> xo, string opc,
}
// Decimal Convert From/to National/Zoned/Signed-QWord
-def BCDCFNo : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>;
-def BCDCFZo : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>;
-def BCDCTNo : VX_VT5_EO5_VB5_XO9_o <5, 385, "bcdctn." , []>;
-def BCDCTZo : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>;
-def BCDCFSQo : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>;
-def BCDCTSQo : VX_VT5_EO5_VB5_XO9_o <0, 385, "bcdctsq.", []>;
+def BCDCFN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>;
+def BCDCFZ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>;
+def BCDCTN_rec : VX_VT5_EO5_VB5_XO9_o <5, 385, "bcdctn." , []>;
+def BCDCTZ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>;
+def BCDCFSQ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>;
+def BCDCTSQ_rec : VX_VT5_EO5_VB5_XO9_o <0, 385, "bcdctsq.", []>;
// Decimal Copy-Sign/Set-Sign
let Defs = [CR6] in
-def BCDCPSGNo : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
+def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
-def BCDSETSGNo : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
+def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
@@ -1526,13 +1565,13 @@ class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
}
// Decimal Shift/Unsigned-Shift/Shift-and-Round
-def BCDSo : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
-def BCDUSo : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>;
-def BCDSRo : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>;
+def BCDS_rec : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
+def BCDUS_rec : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>;
+def BCDSR_rec : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>;
// Decimal (Unsigned) Truncate
-def BCDTRUNCo : VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>;
-def BCDUTRUNCo : VX_VT5_VA5_VB5_XO9_o <321, "bcdutrunc.", []>;
+def BCDTRUNC_rec : VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>;
+def BCDUTRUNC_rec : VX_VT5_VA5_VB5_XO9_o <321, "bcdutrunc.", []>;
// Absolute Difference
def VABSDUB : VXForm_1<1027, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 96b9c9a119c0..115bd44ea202 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -262,8 +262,8 @@ class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
- // Even though ADDICo does not really have an RC bit, provide
- // the declaration of one here so that isDOT has something to set.
+ // Even though ADDIC_rec does not really have an RC bit, provide
+ // the declaration of one here so that isRecordForm has something to set.
bit RC = 0;
}
@@ -428,7 +428,7 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RST;
let Inst{11-15} = A;
@@ -463,7 +463,7 @@ class XForm_base_r3xo_swapped
bits<5> RST;
bits<5> B;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RST;
let Inst{11-15} = A;
@@ -744,7 +744,7 @@ class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RST;
let Inst{11-20} = 0;
@@ -757,7 +757,7 @@ class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
bits<5> FM;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = FM;
let Inst{11-20} = 0;
@@ -902,7 +902,7 @@ class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
: I<opcode, OOL, IOL, asmstr, itin> {
bit L;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{7-9} = 0;
let Inst{10} = L;
@@ -1265,7 +1265,7 @@ class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = XT{4-0};
let Inst{11-15} = XA{4-0};
@@ -1529,6 +1529,29 @@ class XLForm_2_ext_and_DSForm_1<bits<6> opcode1, bits<10> xo1,
let BH = 0;
}
+class XLForm_2_ext_and_DForm_1<bits<6> opcode1, bits<10> xo1, bits<5> bo,
+ bits<5> bi, bit lk, bits<6> opcode2, dag OOL,
+ dag IOL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+
+ bits<5> RST;
+ bits<21> D_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = 0; // Unused (BH)
+ let Inst{21-30} = xo1;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = RST;
+ let Inst{43-47} = D_RA{20-16}; // Base Register
+ let Inst{48-63} = D_RA{15-0}; // Displacement
+}
+
// 1.7.8 XFX-Form
class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
@@ -1628,7 +1651,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
bits<8> FM;
bits<5> rT;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Pattern = pattern;
let Inst{6} = 0;
@@ -1647,7 +1670,7 @@ class XFLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
bit W;
bits<5> FRB;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Pattern = pattern;
let Inst{6} = L;
@@ -1666,7 +1689,7 @@ class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
bits<5> RS;
bits<6> SH;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Pattern = pattern;
let Inst{6-10} = RS;
@@ -1687,7 +1710,7 @@ class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asms
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RT;
let Inst{11-15} = RA;
@@ -1714,7 +1737,7 @@ class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
@@ -1774,7 +1797,7 @@ class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RS;
let Inst{11-15} = RA;
@@ -1800,7 +1823,7 @@ class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RS;
let Inst{11-15} = RA;
@@ -1821,7 +1844,7 @@ class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = RS;
let Inst{11-15} = RA;
@@ -2083,7 +2106,7 @@ class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
@@ -2107,7 +2130,7 @@ class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = FRT;
let Inst{11-22} = idx;
@@ -2125,7 +2148,7 @@ class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
- bit RC = 0; // set by isDOT
+ bit RC = 0; // set by isRecordForm
let Inst{6-10} = VRT;
let Inst{11-14} = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 104b57a70a2e..6cbf999ca73d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -36,7 +36,7 @@ def TEND : XForm_htm1 <31, 686,
def TABORT : XForm_base_r3xo <31, 910,
(outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
- []>, isDOT {
+ []>, isRecordForm {
let RST = 0;
let B = 0;
}
@@ -44,38 +44,38 @@ def TABORT : XForm_base_r3xo <31, 910,
def TABORTWC : XForm_base_r3xo <31, 782,
(outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
"tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TABORTWCI : XForm_base_r3xo <31, 846,
(outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
"tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TABORTDC : XForm_base_r3xo <31, 814,
(outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
"tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TABORTDCI : XForm_base_r3xo <31, 878,
(outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
"tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TSR : XForm_htm2 <31, 750,
(outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
- isDOT;
+ isRecordForm;
def TRECLAIM : XForm_base_r3xo <31, 942,
(outs), (ins gprc:$A), "treclaim. $A",
IIC_SprMTSPR, []>,
- isDOT {
+ isRecordForm {
let RST = 0;
let B = 0;
}
def TRECHKPT : XForm_base_r3xo <31, 1006,
(outs), (ins), "trechkpt.", IIC_SprMTSPR, []>,
- isDOT {
+ isRecordForm {
let RST = 0;
let A = 0;
let B = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6b10672965c9..30906a32b00c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -108,7 +108,7 @@ ScheduleHazardRecognizer *
PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const {
unsigned Directive =
- static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
+ static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II =
@@ -125,7 +125,7 @@ ScheduleHazardRecognizer *
PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
unsigned Directive =
- DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
// FIXME: Leaving this as-is until we have POWER9 scheduling info
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
@@ -202,7 +202,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// On some cores, there is an additional delay between writing to a condition
// register, and using it from a branch.
- unsigned Directive = Subtarget.getDarwinDirective();
+ unsigned Directive = Subtarget.getCPUDirective();
switch (Directive) {
default: break;
case PPC::DIR_7400:
@@ -371,7 +371,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
MachineFunction &MF = *MI.getParent()->getParent();
// Normal instructions can be commuted the obvious way.
- if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMIo)
+ if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
// Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
// 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
@@ -391,7 +391,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// Swap op1/op2
assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
- "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
+ "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
Register Reg0 = MI.getOperand(0).getReg();
Register Reg1 = MI.getOperand(1).getReg();
Register Reg2 = MI.getOperand(2).getReg();
@@ -469,7 +469,7 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
// This function is used for scheduling, and the nop wanted here is the type
// that terminates dispatch groups on the POWER cores.
- unsigned Directive = Subtarget.getDarwinDirective();
+ unsigned Directive = Subtarget.getCPUDirective();
unsigned Opcode;
switch (Directive) {
default: Opcode = PPC::NOP; break;
@@ -903,15 +903,15 @@ static unsigned getCRBitValue(unsigned CRBit) {
void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
// We can end up with self copies and similar things as a result of VSX copy
// legalization. Promote them here.
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (PPC::F8RCRegClass.contains(DestReg) &&
PPC::VSRCRegClass.contains(SrcReg)) {
- unsigned SuperReg =
- TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+ MCRegister SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
if (VSXSelfCopyCrash && SrcReg == SuperReg)
llvm_unreachable("nop VSX copy");
@@ -919,8 +919,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
DestReg = SuperReg;
} else if (PPC::F8RCRegClass.contains(SrcReg) &&
PPC::VSRCRegClass.contains(DestReg)) {
- unsigned SuperReg =
- TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+ MCRegister SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
if (VSXSelfCopyCrash && DestReg == SuperReg)
llvm_unreachable("nop VSX copy");
@@ -931,7 +931,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Different class register copy
if (PPC::CRBITRCRegClass.contains(SrcReg) &&
PPC::GPRCRegClass.contains(DestReg)) {
- unsigned CRReg = getCRFromCRBit(SrcReg);
+ MCRegister CRReg = getCRFromCRBit(SrcReg);
BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
getKillRegState(KillSrc);
// Rotate the CR bit in the CR fields to be the least significant bit and
@@ -1587,22 +1587,6 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
return Found;
}
-bool PPCInstrInfo::isPredicable(const MachineInstr &MI) const {
- unsigned OpC = MI.getOpcode();
- switch (OpC) {
- default:
- return false;
- case PPC::B:
- case PPC::BLR:
- case PPC::BLR8:
- case PPC::BCTR:
- case PPC::BCTR8:
- case PPC::BCTRL:
- case PPC::BCTRL8:
- return true;
- }
-}
-
bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &Mask,
int &Value) const {
@@ -1836,8 +1820,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
int NewOpC = -1;
int MIOpC = MI->getOpcode();
- if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8 ||
- MIOpC == PPC::ANDISo || MIOpC == PPC::ANDISo8)
+ if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
+ MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
NewOpC = MIOpC;
else {
NewOpC = PPC::getRecordFormOpcode(MIOpC);
@@ -1943,9 +1927,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
// The mask value needs to shift right 16 if we're emitting andis.
Mask >>= MBInLoHWord ? 0 : 16;
- NewOpC = MIOpC == PPC::RLWINM ?
- (MBInLoHWord ? PPC::ANDIo : PPC::ANDISo) :
- (MBInLoHWord ? PPC::ANDIo8 :PPC::ANDISo8);
+ NewOpC = MIOpC == PPC::RLWINM
+ ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
+ : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
} else if (MRI->use_empty(GPRRes) && (ME == 31) &&
(ME - MB + 1 == SH) && (MB >= 16)) {
// If we are rotating by the exact number of bits as are in the mask
@@ -1953,7 +1937,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// that's just an andis. (as long as the GPR result has no uses).
Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
Mask >>= 16;
- NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDISo :PPC::ANDISo8;
+ NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
}
// If we've set the mask, we can transform.
if (Mask != ~0LLU) {
@@ -1966,7 +1950,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
int64_t MB = MI->getOperand(3).getImm();
if (MB >= 48) {
uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
- NewOpC = PPC::ANDIo8;
+ NewOpC = PPC::ANDI8_rec;
MI->RemoveOperand(3);
MI->getOperand(2).setImm(Mask);
NumRcRotatesConvertedToRcAnd++;
@@ -2306,7 +2290,7 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
// Replace the instruction.
if (LII.SetCR) {
- MI.setDesc(get(LII.Is64Bit ? PPC::ANDIo8 : PPC::ANDIo));
+ MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
// Set the immediate.
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
@@ -2370,15 +2354,13 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
ImmInstrInfo III;
unsigned Opc = MI.getOpcode();
bool ConvertibleImmForm =
- Opc == PPC::CMPWI || Opc == PPC::CMPLWI ||
- Opc == PPC::CMPDI || Opc == PPC::CMPLDI ||
- Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
- Opc == PPC::ORI || Opc == PPC::ORI8 ||
- Opc == PPC::XORI || Opc == PPC::XORI8 ||
- Opc == PPC::RLDICL || Opc == PPC::RLDICLo ||
- Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
- Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
- Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+ Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
+ Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
+ Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
+ Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
+ Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
+ Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
+ Opc == PPC::RLWINM8_rec;
bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
? isVFRegister(MI.getOperand(0).getReg())
: false;
@@ -2527,6 +2509,225 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
"RegNo should be killed or dead");
}
+// This opt tries to convert the following imm form to an index form to save an
+// add for stack variables.
+// Return false if no such pattern found.
+//
+// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
+// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
+// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
+//
+// can be converted to:
+//
+// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
+// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
+//
+// In order to eliminate ADD instr, make sure that:
+// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
+// new ADDI instr and ADDI can only take int16 Imm.
+// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
+// between ADDI and ADD instr since its original def in ADDI will be changed
+// in new ADDI instr. And also there should be no new def for it between
+// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
+// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
+// between ADD and Imm instr since ADD instr will be eliminated.
+// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
+// moved to Index instr.
+bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ bool PostRA = !MRI->isSSA();
+ // Do this opt after PEI which is after RA. The reason is stack slot expansion
+ // in PEI may expose such opportunities since in PEI, stack slot offsets to
+ // frame base(OffsetAddi) are determined.
+ if (!PostRA)
+ return false;
+ unsigned ToBeDeletedReg = 0;
+ int64_t OffsetImm = 0;
+ unsigned XFormOpcode = 0;
+ ImmInstrInfo III;
+
+ // Check if Imm instr meets requirement.
+ if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
+ III))
+ return false;
+
+ bool OtherIntermediateUse = false;
+ MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
+
+ // Exit if there is other use between ADD and Imm instr or no def found.
+ if (OtherIntermediateUse || !ADDMI)
+ return false;
+
+ // Check if ADD instr meets requirement.
+ if (!isADDInstrEligibleForFolding(*ADDMI))
+ return false;
+
+ unsigned ScaleRegIdx = 0;
+ int64_t OffsetAddi = 0;
+ MachineInstr *ADDIMI = nullptr;
+
+ // Check if there is a valid ToBeChangedReg in ADDMI.
+ // 1: It must be killed.
+ // 2: Its definition must be a valid ADDIMI.
+ // 3: It must satify int16 offset requirement.
+ if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
+ ScaleRegIdx = 2;
+ else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
+ ScaleRegIdx = 1;
+ else
+ return false;
+
+ assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
+ unsigned ToBeChangedReg = ADDIMI->getOperand(0).getReg();
+ unsigned ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
+ auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
+ MachineBasicBlock::iterator End) {
+ for (auto It = ++Start; It != End; It++)
+ if (It->modifiesRegister(Reg, &getRegisterInfo()))
+ return true;
+ return false;
+ };
+ // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
+ // and Imm Instr.
+ if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
+ return false;
+
+ // Now start to do the transformation.
+ LLVM_DEBUG(dbgs() << "Replace instruction: "
+ << "\n");
+ LLVM_DEBUG(ADDIMI->dump());
+ LLVM_DEBUG(ADDMI->dump());
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "with: "
+ << "\n");
+
+ // Update ADDI instr.
+ ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
+
+ // Update Imm instr.
+ MI.setDesc(get(XFormOpcode));
+ MI.getOperand(III.ImmOpNo)
+ .ChangeToRegister(ScaleReg, false, false,
+ ADDMI->getOperand(ScaleRegIdx).isKill());
+
+ MI.getOperand(III.OpNoForForwarding)
+ .ChangeToRegister(ToBeChangedReg, false, false, true);
+
+ // Eliminate ADD instr.
+ ADDMI->eraseFromParent();
+
+ LLVM_DEBUG(ADDIMI->dump());
+ LLVM_DEBUG(MI.dump());
+
+ return true;
+}
+
+bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
+ int64_t &Imm) const {
+ unsigned Opc = ADDIMI.getOpcode();
+
+ // Exit if the instruction is not ADDI.
+ if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
+ return false;
+
+ Imm = ADDIMI.getOperand(2).getImm();
+
+ return true;
+}
+
+bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const {
+ unsigned Opc = ADDMI.getOpcode();
+
+ // Exit if the instruction is not ADD.
+ return Opc == PPC::ADD4 || Opc == PPC::ADD8;
+}
+
+bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,
+ unsigned &ToBeDeletedReg,
+ unsigned &XFormOpcode,
+ int64_t &OffsetImm,
+ ImmInstrInfo &III) const {
+ // Only handle load/store.
+ if (!MI.mayLoadOrStore())
+ return false;
+
+ unsigned Opc = MI.getOpcode();
+
+ XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
+
+ // Exit if instruction has no index form.
+ if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
+ return false;
+
+ // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
+ if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()),
+ III, true))
+ return false;
+
+ if (!III.IsSummingOperands)
+ return false;
+
+ MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
+ MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
+ // Only support imm operands, not relocation slots or others.
+ if (!ImmOperand.isImm())
+ return false;
+
+ assert(RegOperand.isReg() && "Instruction format is not right");
+
+ // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
+ if (!RegOperand.isKill())
+ return false;
+
+ ToBeDeletedReg = RegOperand.getReg();
+ OffsetImm = ImmOperand.getImm();
+
+ return true;
+}
+
+bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+ MachineInstr *&ADDIMI,
+ int64_t &OffsetAddi,
+ int64_t OffsetImm) const {
+ assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
+ MachineOperand &MO = ADDMI->getOperand(Index);
+
+ if (!MO.isKill())
+ return false;
+
+ bool OtherIntermediateUse = false;
+
+ ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
+ // Currently handle only one "add + Imminstr" pair case, exit if other
+ // intermediate use for ToBeChangedReg found.
+ // TODO: handle the cases where there are other "add + Imminstr" pairs
+ // with same offset in Imminstr which is like:
+ //
+ // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
+ // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
+ // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
+ // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
+ // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
+ //
+ // can be converted to:
+ //
+ // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
+ // (OffsetAddi + OffsetImm)
+ // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
+ // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
+
+ if (OtherIntermediateUse || !ADDIMI)
+ return false;
+ // Check if ADDI instr meets requirement.
+ if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
+ return false;
+
+ if (isInt<16>(OffsetAddi + OffsetImm))
+ return true;
+ return false;
+}
+
// If this instruction has an immediate form and one of its operands is a
// result of a load-immediate or an add-immediate, convert it to
// the immediate form if the constant is in range.
@@ -2660,34 +2861,34 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
case PPC::RLDICL:
- case PPC::RLDICLo:
+ case PPC::RLDICL_rec:
case PPC::RLDICL_32:
case PPC::RLDICL_32_64: {
// Use APInt's rotate function.
int64_t SH = MI.getOperand(2).getImm();
int64_t MB = MI.getOperand(3).getImm();
- APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICLo) ?
- 64 : 32, SExtImm, true);
+ APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
+ SExtImm, true);
InVal = InVal.rotl(SH);
uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
InVal &= Mask;
// Can't replace negative values with an LI as that will sign-extend
// and not clear the left bits. If we're setting the CR bit, we will use
- // ANDIo which won't sign extend, so that's safe.
+ // ANDI_rec which won't sign extend, so that's safe.
if (isUInt<15>(InVal.getSExtValue()) ||
- (Opc == PPC::RLDICLo && isUInt<16>(InVal.getSExtValue()))) {
+ (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
ReplaceWithLI = true;
Is64BitLI = Opc != PPC::RLDICL_32;
NewImm = InVal.getSExtValue();
- SetCR = Opc == PPC::RLDICLo;
+ SetCR = Opc == PPC::RLDICL_rec;
break;
}
return false;
}
case PPC::RLWINM:
case PPC::RLWINM8:
- case PPC::RLWINMo:
- case PPC::RLWINM8o: {
+ case PPC::RLWINM_rec:
+ case PPC::RLWINM8_rec: {
int64_t SH = MI.getOperand(2).getImm();
int64_t MB = MI.getOperand(3).getImm();
int64_t ME = MI.getOperand(4).getImm();
@@ -2698,15 +2899,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
InVal &= Mask;
// Can't replace negative values with an LI as that will sign-extend
// and not clear the left bits. If we're setting the CR bit, we will use
- // ANDIo which won't sign extend, so that's safe.
+ // ANDI_rec which won't sign extend, so that's safe.
bool ValueFits = isUInt<15>(InVal.getSExtValue());
- ValueFits |= ((Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o) &&
+ ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
isUInt<16>(InVal.getSExtValue()));
if (ValueFits) {
ReplaceWithLI = true;
- Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+ Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
NewImm = InVal.getSExtValue();
- SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o;
+ SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
break;
}
return false;
@@ -2768,7 +2969,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
LII.Is64Bit = Is64BitLI;
LII.SetCR = SetCR;
// If we're setting the CR, the original load-immediate must be kept (as an
- // operand to ANDIo/ANDI8o).
+ // operand to ANDI_rec/ANDI8_rec).
if (KilledDef && SetCR)
*KilledDef = nullptr;
replaceInstrWithLI(MI, LII);
@@ -2819,13 +3020,13 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
III.IsSummingOperands = true;
III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
break;
- case PPC::ADDCo:
+ case PPC::ADDC_rec:
III.SignedImm = true;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
III.IsSummingOperands = true;
- III.ImmOpcode = PPC::ADDICo;
+ III.ImmOpcode = PPC::ADDIC_rec;
break;
case PPC::SUBFC:
case PPC::SUBFC8:
@@ -2851,8 +3052,8 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
III.IsCommutative = false;
III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
break;
- case PPC::ANDo:
- case PPC::AND8o:
+ case PPC::AND_rec:
+ case PPC::AND8_rec:
case PPC::OR:
case PPC::OR8:
case PPC::XOR:
@@ -2863,8 +3064,12 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
III.IsCommutative = true;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
- case PPC::ANDo: III.ImmOpcode = PPC::ANDIo; break;
- case PPC::AND8o: III.ImmOpcode = PPC::ANDIo8; break;
+ case PPC::AND_rec:
+ III.ImmOpcode = PPC::ANDI_rec;
+ break;
+ case PPC::AND8_rec:
+ III.ImmOpcode = PPC::ANDI8_rec;
+ break;
case PPC::OR: III.ImmOpcode = PPC::ORI; break;
case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
@@ -2873,18 +3078,18 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
break;
case PPC::RLWNM:
case PPC::RLWNM8:
- case PPC::RLWNMo:
- case PPC::RLWNM8o:
+ case PPC::RLWNM_rec:
+ case PPC::RLWNM8_rec:
case PPC::SLW:
case PPC::SLW8:
- case PPC::SLWo:
- case PPC::SLW8o:
+ case PPC::SLW_rec:
+ case PPC::SLW8_rec:
case PPC::SRW:
case PPC::SRW8:
- case PPC::SRWo:
- case PPC::SRW8o:
+ case PPC::SRW_rec:
+ case PPC::SRW8_rec:
case PPC::SRAW:
- case PPC::SRAWo:
+ case PPC::SRAW_rec:
III.SignedImm = false;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
@@ -2894,8 +3099,8 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
// This does not apply to shift right algebraic because a value
// out of range will produce a -1/0.
III.ImmWidth = 16;
- if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 ||
- Opc == PPC::RLWNMo || Opc == PPC::RLWNM8o)
+ if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
+ Opc == PPC::RLWNM8_rec)
III.TruncateImmTo = 5;
else
III.TruncateImmTo = 6;
@@ -2903,38 +3108,50 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
default: llvm_unreachable("Unknown opcode");
case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
- case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
- case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
+ case PPC::RLWNM_rec:
+ III.ImmOpcode = PPC::RLWINM_rec;
+ break;
+ case PPC::RLWNM8_rec:
+ III.ImmOpcode = PPC::RLWINM8_rec;
+ break;
case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
- case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
- case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+ case PPC::SLW_rec:
+ III.ImmOpcode = PPC::RLWINM_rec;
+ break;
+ case PPC::SLW8_rec:
+ III.ImmOpcode = PPC::RLWINM8_rec;
+ break;
case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
- case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
- case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+ case PPC::SRW_rec:
+ III.ImmOpcode = PPC::RLWINM_rec;
+ break;
+ case PPC::SRW8_rec:
+ III.ImmOpcode = PPC::RLWINM8_rec;
+ break;
case PPC::SRAW:
III.ImmWidth = 5;
III.TruncateImmTo = 0;
III.ImmOpcode = PPC::SRAWI;
break;
- case PPC::SRAWo:
+ case PPC::SRAW_rec:
III.ImmWidth = 5;
III.TruncateImmTo = 0;
- III.ImmOpcode = PPC::SRAWIo;
+ III.ImmOpcode = PPC::SRAWI_rec;
break;
}
break;
case PPC::RLDCL:
- case PPC::RLDCLo:
+ case PPC::RLDCL_rec:
case PPC::RLDCR:
- case PPC::RLDCRo:
+ case PPC::RLDCR_rec:
case PPC::SLD:
- case PPC::SLDo:
+ case PPC::SLD_rec:
case PPC::SRD:
- case PPC::SRDo:
+ case PPC::SRD_rec:
case PPC::SRAD:
- case PPC::SRADo:
+ case PPC::SRAD_rec:
III.SignedImm = false;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
@@ -2944,30 +3161,38 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
// This does not apply to shift right algebraic because a value
// out of range will produce a -1/0.
III.ImmWidth = 16;
- if (Opc == PPC::RLDCL || Opc == PPC::RLDCLo ||
- Opc == PPC::RLDCR || Opc == PPC::RLDCRo)
+ if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
+ Opc == PPC::RLDCR_rec)
III.TruncateImmTo = 6;
else
III.TruncateImmTo = 7;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
- case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
+ case PPC::RLDCL_rec:
+ III.ImmOpcode = PPC::RLDICL_rec;
+ break;
case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
- case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
+ case PPC::RLDCR_rec:
+ III.ImmOpcode = PPC::RLDICR_rec;
+ break;
case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
- case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
+ case PPC::SLD_rec:
+ III.ImmOpcode = PPC::RLDICR_rec;
+ break;
case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
- case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
+ case PPC::SRD_rec:
+ III.ImmOpcode = PPC::RLDICL_rec;
+ break;
case PPC::SRAD:
III.ImmWidth = 6;
III.TruncateImmTo = 0;
III.ImmOpcode = PPC::SRADI;
break;
- case PPC::SRADo:
+ case PPC::SRAD_rec:
III.ImmWidth = 6;
III.TruncateImmTo = 0;
- III.ImmOpcode = PPC::SRADIo;
+ III.ImmOpcode = PPC::SRADI_rec;
break;
}
break;
@@ -3538,14 +3763,16 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
unsigned Opc = MI.getOpcode();
- bool SpecialShift32 =
- Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
- bool SpecialShift64 =
- Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo;
- bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo ||
- Opc == PPC::SLDo || Opc == PPC::SRDo;
- bool RightShift =
- Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
+ bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
+ Opc == PPC::SRW || Opc == PPC::SRW_rec ||
+ Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
+ Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
+ bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
+ Opc == PPC::SRD || Opc == PPC::SRD_rec;
+ bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
+ Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
+ bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
+ Opc == PPC::SRD_rec;
MI.setDesc(get(III.ImmOpcode));
if (ConstantOpNo == III.OpNoForForwarding) {
@@ -3649,27 +3876,21 @@ int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
// i.e. 0 to 31-th bits are same as 32-th bit.
static bool isSignExtendingOp(const MachineInstr &MI) {
int Opcode = MI.getOpcode();
- if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
- Opcode == PPC::LIS || Opcode == PPC::LIS8 ||
- Opcode == PPC::SRAW || Opcode == PPC::SRAWo ||
- Opcode == PPC::SRAWI || Opcode == PPC::SRAWIo ||
- Opcode == PPC::LWA || Opcode == PPC::LWAX ||
- Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 ||
- Opcode == PPC::LHA || Opcode == PPC::LHAX ||
- Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 ||
- Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
- Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
- Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
- Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
- Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
- Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
- Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
- Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 ||
- Opcode == PPC::EXTSB || Opcode == PPC::EXTSBo ||
- Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo ||
- Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 ||
- Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo ||
- Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
+ if (Opcode == PPC::LI || Opcode == PPC::LI8 || Opcode == PPC::LIS ||
+ Opcode == PPC::LIS8 || Opcode == PPC::SRAW || Opcode == PPC::SRAW_rec ||
+ Opcode == PPC::SRAWI || Opcode == PPC::SRAWI_rec || Opcode == PPC::LWA ||
+ Opcode == PPC::LWAX || Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 ||
+ Opcode == PPC::LHA || Opcode == PPC::LHAX || Opcode == PPC::LHA8 ||
+ Opcode == PPC::LHAX8 || Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
+ Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || Opcode == PPC::LBZU ||
+ Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
+ Opcode == PPC::LHZ || Opcode == PPC::LHZX || Opcode == PPC::LHZ8 ||
+ Opcode == PPC::LHZX8 || Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
+ Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || Opcode == PPC::EXTSB ||
+ Opcode == PPC::EXTSB_rec || Opcode == PPC::EXTSH ||
+ Opcode == PPC::EXTSH_rec || Opcode == PPC::EXTSB8 ||
+ Opcode == PPC::EXTSH8 || Opcode == PPC::EXTSW ||
+ Opcode == PPC::EXTSW_rec || Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
Opcode == PPC::EXTSB8_32_64)
return true;
@@ -3677,8 +3898,8 @@ static bool isSignExtendingOp(const MachineInstr &MI) {
if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33)
return true;
- if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
- Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo) &&
+ if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+ Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
MI.getOperand(3).getImm() > 0 &&
MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
return true;
@@ -3701,52 +3922,46 @@ static bool isZeroExtendingOp(const MachineInstr &MI) {
// We have some variations of rotate-and-mask instructions
// that clear higher 32-bits.
- if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
- Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo ||
+ if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
+ Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
Opcode == PPC::RLDICL_32_64) &&
MI.getOperand(3).getImm() >= 32)
return true;
- if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
+ if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
MI.getOperand(3).getImm() >= 32 &&
MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm())
return true;
- if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
- Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
+ if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+ Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
return true;
// There are other instructions that clear higher 32-bits.
- if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
- Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
+ if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
+ Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 ||
- Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
- Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo ||
- Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW ||
- Opcode == PPC::SLW || Opcode == PPC::SLWo ||
- Opcode == PPC::SRW || Opcode == PPC::SRWo ||
- Opcode == PPC::SLW8 || Opcode == PPC::SRW8 ||
- Opcode == PPC::SLWI || Opcode == PPC::SLWIo ||
- Opcode == PPC::SRWI || Opcode == PPC::SRWIo ||
- Opcode == PPC::LWZ || Opcode == PPC::LWZX ||
- Opcode == PPC::LWZU || Opcode == PPC::LWZUX ||
- Opcode == PPC::LWBRX || Opcode == PPC::LHBRX ||
- Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
- Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
- Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
- Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
- Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 ||
- Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8 ||
- Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 ||
- Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
- Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 ||
- Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
- Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
- Opcode == PPC::ANDIo || Opcode == PPC::ANDISo ||
- Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWIo ||
- Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWIo ||
+ Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
+ Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec ||
+ Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW || Opcode == PPC::SLW ||
+ Opcode == PPC::SLW_rec || Opcode == PPC::SRW || Opcode == PPC::SRW_rec ||
+ Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || Opcode == PPC::SLWI ||
+ Opcode == PPC::SLWI_rec || Opcode == PPC::SRWI ||
+ Opcode == PPC::SRWI_rec || Opcode == PPC::LWZ || Opcode == PPC::LWZX ||
+ Opcode == PPC::LWZU || Opcode == PPC::LWZUX || Opcode == PPC::LWBRX ||
+ Opcode == PPC::LHBRX || Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
+ Opcode == PPC::LHZU || Opcode == PPC::LHZUX || Opcode == PPC::LBZ ||
+ Opcode == PPC::LBZX || Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
+ Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || Opcode == PPC::LWZU8 ||
+ Opcode == PPC::LWZUX8 || Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 ||
+ Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || Opcode == PPC::LHZU8 ||
+ Opcode == PPC::LHZUX8 || Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
+ Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
+ Opcode == PPC::ANDI_rec || Opcode == PPC::ANDIS_rec ||
+ Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWI_rec ||
+ Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWI_rec ||
Opcode == PPC::MFVSRWZ)
return true;
@@ -3840,14 +4055,14 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
return false;
}
- case PPC::ANDIo:
- case PPC::ANDISo:
+ case PPC::ANDI_rec:
+ case PPC::ANDIS_rec:
case PPC::ORI:
case PPC::ORIS:
case PPC::XORI:
case PPC::XORIS:
- case PPC::ANDIo8:
- case PPC::ANDISo8:
+ case PPC::ANDI8_rec:
+ case PPC::ANDIS8_rec:
case PPC::ORI8:
case PPC::ORIS8:
case PPC::XORI8:
@@ -4042,12 +4257,10 @@ MachineInstr *PPCInstrInfo::findLoopInstr(
// Return true if get the base operand, byte offset of an instruction and the
// memory width. Width is the size of memory that is being loaded/stored.
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
- const MachineInstr &LdSt,
- const MachineOperand *&BaseReg,
- int64_t &Offset,
- unsigned &Width,
- const TargetRegisterInfo *TRI) const {
- assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+ const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
+ unsigned &Width, const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
// Handle only loads/stores with base register followed by immediate offset.
if (LdSt.getNumExplicitOperands() != 3)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 19ab30cb0908..2fe8df0e1d68 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -280,7 +280,7 @@ public:
unsigned FalseReg) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -346,8 +346,6 @@ public:
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(const MachineInstr &MI) const override;
-
// Comparison optimization.
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
@@ -422,6 +420,16 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
+ bool foldFrameOffset(MachineInstr &MI) const;
+ bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
+ bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
+ bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
+ unsigned &XFormOpcode,
+ int64_t &OffsetOfImmInstr,
+ ImmInstrInfo &III) const;
+ bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+ MachineInstr *&ADDIMI, int64_t &OffsetAddi,
+ int64_t OffsetImm) const;
/// Fixup killed/dead flag for register \p RegNo between instructions [\p
/// StartMI, \p EndMI]. Some PostRA transformations may violate register
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 24183277519b..b38ca3af63f5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -61,10 +61,6 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
-def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>,
- SDTCisVec<1>
-]>;
-
def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
]>;
@@ -117,6 +113,10 @@ def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli
SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2>
]>;
+def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@@ -165,7 +165,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
-
+def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
+def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
@@ -199,7 +200,6 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
-def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
@@ -486,7 +486,7 @@ def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>;
// PowerPC Flag Definitions.
class isPPC64 { bit PPC64 = 1; }
-class isDOT { bit RC = 1; }
+class isRecordForm { bit RC = 1; }
class RegConstraint<string C> {
string Constraints = C;
@@ -961,9 +961,9 @@ multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XForm_6<opcode, xo, OOL, IOL,
+ def _rec : XForm_6<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -976,9 +976,9 @@ multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XForm_6<opcode, xo, OOL, IOL,
+ def _rec : XForm_6<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -991,9 +991,9 @@ multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XForm_10<opcode, xo, OOL, IOL,
+ def _rec : XForm_10<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1005,9 +1005,9 @@ multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XForm_11<opcode, xo, OOL, IOL,
+ def _rec : XForm_11<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1019,9 +1019,35 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
+ }
+}
+
+// Multiclass for instructions which have a record overflow form as well
+// as a record form but no carry (i.e. mulld, mulldo, subf, subfo, etc.)
+multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_1<opcode, xo, 0, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def _rec : XOForm_1<opcode, xo, 0, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isRecordForm, RecFormRel;
+ }
+ let BaseName = !strconcat(asmbase, "O") in {
+ let Defs = [XER] in
+ def O : XOForm_1<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+ []>, RecFormRel;
+ let Defs = [XER, CR0] in
+ def O_rec : XOForm_1<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1035,11 +1061,21 @@ multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel, PPC970_DGroup_First,
+ []>, isRecordForm, RecFormRel, PPC970_DGroup_First,
PPC970_DGroup_Cracked;
}
+ let BaseName = !strconcat(asmbase, "O") in {
+ let Defs = [XER] in
+ def O : XOForm_1<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+ []>, RecFormRel;
+ let Defs = [XER, CR0] in
+ def O_rec : XOForm_1<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+ []>, isRecordForm, RecFormRel;
+ }
}
multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
@@ -1051,9 +1087,19 @@ multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_1<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
+ }
+ let BaseName = !strconcat(asmbase, "O") in {
+ let Defs = [CARRY, XER] in
+ def O : XOForm_1<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+ []>, RecFormRel;
+ let Defs = [CARRY, XER, CR0] in
+ def O_rec : XOForm_1<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1065,9 +1111,19 @@ multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_3<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
+ }
+ let BaseName = !strconcat(asmbase, "O") in {
+ let Defs = [XER] in
+ def O : XOForm_3<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+ []>, RecFormRel;
+ let Defs = [XER, CR0] in
+ def O_rec : XOForm_3<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1080,9 +1136,19 @@ multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ def _rec : XOForm_3<opcode, xo, oe, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
+ }
+ let BaseName = !strconcat(asmbase, "O") in {
+ let Defs = [CARRY, XER] in
+ def O : XOForm_3<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+ []>, RecFormRel;
+ let Defs = [CARRY, XER, CR0] in
+ def O_rec : XOForm_3<opcode, xo, 1, OOL, IOL,
+ !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1094,9 +1160,9 @@ multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : MForm_2<opcode, OOL, IOL,
+ def _rec : MForm_2<opcode, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1108,9 +1174,9 @@ multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : MDForm_1<opcode, xo, OOL, IOL,
+ def _rec : MDForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1122,9 +1188,9 @@ multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : MDSForm_1<opcode, xo, OOL, IOL,
+ def _rec : MDSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1137,9 +1203,9 @@ multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CARRY, CR0] in
- def o : XSForm_1<opcode, xo, OOL, IOL,
+ def _rec : XSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1151,9 +1217,9 @@ multiclass XSForm_1r<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR0] in
- def o : XSForm_1<opcode, xo, OOL, IOL,
+ def _rec : XSForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1165,9 +1231,9 @@ multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : XForm_26<opcode, xo, OOL, IOL,
+ def _rec : XForm_26<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1179,9 +1245,9 @@ multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : XForm_28<opcode, xo, OOL, IOL,
+ def _rec : XForm_28<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1193,9 +1259,9 @@ multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : AForm_1<opcode, xo, OOL, IOL,
+ def _rec : AForm_1<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1207,9 +1273,9 @@ multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : AForm_2<opcode, xo, OOL, IOL,
+ def _rec : AForm_2<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1221,9 +1287,9 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
pattern>, RecFormRel;
let Defs = [CR1] in
- def o : AForm_3<opcode, xo, OOL, IOL,
+ def _rec : AForm_3<opcode, xo, OOL, IOL,
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
}
@@ -1324,12 +1390,13 @@ def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
}
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isReturn = 1, Uses = [LR, RM] in
+ let isPredicable = 1, isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>, Requires<[In32BitMode]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
- def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
- []>;
+ let isPredicable = 1 in
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>;
let isCodeGenOnly = 1 in {
def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
@@ -1362,9 +1429,10 @@ let Defs = [LR] in
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
- def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
- "b $dst", IIC_BrB,
- [(br bb:$dst)]>;
+ let isPredicable = 1 in
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+ "b $dst", IIC_BrB,
+ [(br bb:$dst)]>;
def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
"ba $dst", IIC_BrB, []>;
}
@@ -1485,9 +1553,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
}
let Uses = [CTR, RM] in {
- def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
- "bctrl", IIC_BrB, [(PPCbctrl)]>,
- Requires<[In32BitMode]>;
+ let isPredicable = 1 in
+ def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
+ Requires<[In32BitMode]>;
let isCodeGenOnly = 1 in {
def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
@@ -1574,6 +1643,15 @@ def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
"#TC_RETURNr $dst $offset",
[]>;
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+ Defs = [LR, R2], Uses = [CTR, RM], RST = 2 in {
+ def BCTRL_LWZinto_toc:
+ XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
+ (ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
+ [(PPCbctrl_load_toc iaddr:$src)]>, Requires<[In32BitMode]>;
+
+}
+
let isCodeGenOnly = 1 in {
@@ -1839,15 +1917,15 @@ def LWARX : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src),
// Instructions to support lock versions of atomics
// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
def LBARXL : XForm_1_memOp<31, 52, (outs gprc:$rD), (ins memrr:$src),
- "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm,
Requires<[HasPartwordAtomics]>;
def LHARXL : XForm_1_memOp<31, 116, (outs gprc:$rD), (ins memrr:$src),
- "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm,
Requires<[HasPartwordAtomics]>;
def LWARXL : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+ "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm;
// The atomic instructions use the destination register as well as the next one
// or two registers in order (modulo 31).
@@ -1860,14 +1938,14 @@ def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC),
let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
"stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
- isDOT, Requires<[HasPartwordAtomics]>;
+ isRecordForm, Requires<[HasPartwordAtomics]>;
def STHCX : XForm_1_memOp<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
"sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
- isDOT, Requires<[HasPartwordAtomics]>;
+ isRecordForm, Requires<[HasPartwordAtomics]>;
def STWCX : XForm_1_memOp<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+ "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isRecordForm;
}
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
@@ -2034,6 +2112,7 @@ def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src),
}
// Load Multiple
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
"lmw $rD, $src", IIC_LdStLMW, []>;
@@ -2188,6 +2267,7 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
}
// Store Multiple
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", IIC_LdStLMW, []>;
@@ -2221,9 +2301,9 @@ def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
[(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
RecFormRel, PPC970_DGroup_Cracked;
let Defs = [CARRY, CR0] in
-def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+def ADDIC_rec : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IIC_IntGeneral,
- []>, isDOT, RecFormRel;
+ []>, isRecordForm, RecFormRel;
}
def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
"addis $rD, $rA, $imm", IIC_IntSimple,
@@ -2253,14 +2333,14 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CR0] in {
-def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+def ANDI_rec : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IIC_IntGeneral,
[(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
- isDOT;
-def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ isRecordForm;
+def ANDIS_rec : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IIC_IntGeneral,
[(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
- isDOT;
+ isRecordForm;
}
def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IIC_IntSimple,
@@ -2621,6 +2701,7 @@ def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let hasSideEffects = 0 in {
let Defs = [LR] in {
def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
"mtlr $rS", IIC_SprMTSPR>,
@@ -2631,6 +2712,7 @@ def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
"mflr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+}
let isCodeGenOnly = 1 in {
// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
@@ -2732,8 +2814,8 @@ let Uses = [RM] in {
PPC970_DGroup_Single, PPC970_Unit_FPU;
let Defs = [CR1] in
- def MFFSo : XForm_42<63, 583, (outs f8rc:$rT), (ins),
- "mffs. $rT", IIC_IntMFFS, []>, isDOT;
+ def MFFS_rec : XForm_42<63, 583, (outs f8rc:$rT), (ins),
+ "mffs. $rT", IIC_IntMFFS, []>, isRecordForm;
def MFFSCE : X_FRT5_XO2_XO3_XO10<63, 0, 1, 583, (outs f8rc:$rT), (ins),
"mffsce $rT", IIC_IntMFFS, []>,
@@ -2778,9 +2860,9 @@ def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
let isCommutable = 1 in
-defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "add", "$rT, $rA, $rB", IIC_IntSimple,
- [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+defm ADD4 : XOForm_1rx<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
+ [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
let isCodeGenOnly = 1 in
def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB),
"add $rT, $rA, $rB", IIC_IntSimple,
@@ -2797,24 +2879,14 @@ defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"divwu", "$rT, $rA, $rB", IIC_IntDivW,
[(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
-def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwe $rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
- Requires<[HasExtDiv]>;
-let Defs = [CR0] in
-def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwe. $rT, $rA, $rB", IIC_IntDivW,
- []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
- Requires<[HasExtDiv]>;
-def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divweu $rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
- Requires<[HasExtDiv]>;
-let Defs = [CR0] in
-def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divweu. $rT, $rA, $rB", IIC_IntDivW,
- []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
- Requires<[HasExtDiv]>;
+defm DIVWE : XOForm_1rcr<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+defm DIVWEU : XOForm_1rcr<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
let isCommutable = 1 in {
defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
@@ -2822,13 +2894,13 @@ defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
[(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
-defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
- [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+defm MULLW : XOForm_1rx<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
+ [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
} // isCommutable
-defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subf", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+defm SUBF : XOForm_1rx<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"subfc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
@@ -2984,10 +3056,10 @@ def RLWINM : MForm_2<21,
"rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>, RecFormRel;
let Defs = [CR0] in
-def RLWINMo : MForm_2<21,
+def RLWINM_rec : MForm_2<21,
(outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
"rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
- []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
+ []>, isRecordForm, RecFormRel, PPC970_DGroup_Cracked;
}
defm RLWNM : MForm_2r<23, (outs gprc:$rA),
(ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
@@ -3177,6 +3249,11 @@ def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentr
// the function label.
def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+// Pseudo-instruction marked for deletion. When deleting the instruction would
+// cause iterator invalidation in MIR transformation passes, this pseudo can be
+// used instead. It will be removed unconditionally at pre-emit time (prior to
+// branch selection).
+def UNENCODED_NOP: PPCEmitTimePseudo<(outs), (ins), "#UNENCODED_NOP", []>;
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
@@ -3219,10 +3296,10 @@ def : Pat<(f64 (fpextend f32:$src)),
// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
// versions of Power.
-def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(atomic_fence (i64 7), (timm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (i32 7), (timm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (timm), (timm)), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (timm), (timm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
let Predicates = [HasFPU] in {
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
@@ -4010,24 +4087,24 @@ def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
(SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
- "#ANDIo_1_EQ_BIT",
+def ANDI_rec_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDI_rec_1_EQ_BIT",
[(set i1:$dst, (trunc (not i32:$in)))]>;
-def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
- "#ANDIo_1_GT_BIT",
+def ANDI_rec_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDI_rec_1_GT_BIT",
[(set i1:$dst, (trunc i32:$in))]>;
-def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
- "#ANDIo_1_EQ_BIT8",
+def ANDI_rec_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDI_rec_1_EQ_BIT8",
[(set i1:$dst, (trunc (not i64:$in)))]>;
-def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
- "#ANDIo_1_GT_BIT8",
+def ANDI_rec_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDI_rec_1_GT_BIT8",
[(set i1:$dst, (trunc i64:$in))]>;
def : Pat<(i1 (not (trunc i32:$in))),
- (ANDIo_1_EQ_BIT $in)>;
+ (ANDI_rec_1_EQ_BIT $in)>;
def : Pat<(i1 (not (trunc i64:$in))),
- (ANDIo_1_EQ_BIT8 $in)>;
+ (ANDI_rec_1_EQ_BIT8 $in)>;
//===----------------------------------------------------------------------===//
// PowerPC Instructions used for assembler/disassembler only
@@ -4111,22 +4188,22 @@ def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
"mtfsfi $BF, $U, $W", IIC_IntMFFS>;
-def MTFSFIo : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
- "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isDOT;
+def MTFSFI_rec : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+ "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isRecordForm;
def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>;
-def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>;
+def : InstAlias<"mtfsfi. $BF, $U", (MTFSFI_rec crrc:$BF, i32imm:$U, 0)>;
let Predicates = [HasFPU] in {
def MTFSF : XFLForm_1<63, 711, (outs),
(ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
"mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>;
-def MTFSFo : XFLForm_1<63, 711, (outs),
+def MTFSF_rec : XFLForm_1<63, 711, (outs),
(ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
- "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isDOT;
+ "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isRecordForm;
def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>;
-def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSF_rec i32imm:$FLM, f8rc:$FRB, 0, 0)>;
}
def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
@@ -4144,8 +4221,8 @@ def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
let Defs = [CR0] in
-def SLBFEEo : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
- "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isDOT;
+def SLBFEE_rec : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
+ "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isRecordForm;
def TLBIA : XForm_0<31, 370, (outs), (ins),
"tlbia", IIC_SprTLBIA, []>;
@@ -4188,7 +4265,7 @@ def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
(ins gprc:$RST, gprc:$A, gprc:$B),
"tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
- Requires<[IsPPC4xx]>, isDOT;
+ Requires<[IsPPC4xx]>, isRecordForm;
def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
@@ -4406,10 +4483,10 @@ def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
def : InstAlias<"xnop", (XORI R0, R0, 0)>;
def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"not. $rA, $rB", (NOR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
@@ -4475,13 +4552,13 @@ def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm",
+def SUBIC_rec : PPCAsmPseudo<"subic. $rA, $rB, $imm",
(ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
@@ -4534,109 +4611,109 @@ def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
+def EXTLWI_rec : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
+def EXTRWI_rec : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
+def INSLWI_rec : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
+def INSRWI_rec : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
+def ROTRWI_rec : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n",
+def SLWI_rec : PPCAsmPseudo<"slwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n",
+def SRWI_rec : PPCAsmPseudo<"srwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
+def CLRRWI_rec : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
-def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
+def CLRLSLWI_rec : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
-def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINM_rec gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
-def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNM_rec gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
-def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINM_rec gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
-def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZW_rec gprc:$rA, gprc:$rS)>;
// The POWER variant
def : MnemonicAlias<"cntlz", "cntlzw">;
def : MnemonicAlias<"cntlz.", "cntlzw.">;
def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
+def EXTLDI_rec : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
+def EXTRDI_rec : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
+def INSRDI_rec : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
+def ROTRDI_rec : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n",
+def SLDI_rec : PPCAsmPseudo<"sldi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n",
+def SRDI_rec : PPCAsmPseudo<"srdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
+def CLRRDI_rec : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
-def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
+def CLRLSLDI_rec : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
def SUBPCIS : PPCAsmPseudo<"subpcis $RT, $D", (ins g8rc:$RT, s16imm:$D)>;
def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
-def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICL_rec g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
-def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCL_rec g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def : InstAlias<"clrldi $rA, $rS, $n",
(RLDICL_32_64 g8rc:$rA, gprc:$rS, 0, u6imm:$n)>;
-def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICL_rec g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def : InstAlias<"lnia $RT", (ADDPCIS g8rc:$RT, 0)>;
def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+def RLWINMbm_rec : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+def RLWIMIbm_rec : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+def RLWNMbm_rec : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
// These generic branch instruction forms are used for the assembler parser only.
@@ -4865,7 +4942,7 @@ let mayStore = 1 in
def CP_PASTE : X_L1_RA5_RB5<31, 902, "paste" , gprc, IIC_LdStPASTE, []>;
let mayStore = 1, Defs = [CR0] in
-def CP_PASTEo : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isDOT;
+def CP_PASTE_rec : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isRecordForm;
def CP_COPYx : PPCAsmPseudo<"copy $rA, $rB" , (ins gprc:$rA, gprc:$rB)>;
def CP_PASTEx : PPCAsmPseudo<"paste $rA, $rB", (ins gprc:$rA, gprc:$rB)>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 2aad5860d87f..be6b30ffa08b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -120,11 +120,11 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
[(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
let Defs = [CR6] in
- def o : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ def _rec : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[(set InTy:$XT,
(InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
- isDOT;
+ isRecordForm;
}
}
@@ -152,7 +152,6 @@ def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
let hasSideEffects = 0 in { // VSX instructions don't have side effects.
-let Uses = [RM] in {
// Load indexed instructions
let mayLoad = 1, mayStore = 0 in {
@@ -214,6 +213,7 @@ let Uses = [RM] in {
}
} // mayStore
+ let Uses = [RM] in {
// Add/Mul Instructions
let isCommutable = 1 in {
def XSADDDP : XX3Form<60, 32,
@@ -1255,6 +1255,55 @@ def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
} // AddedComplexity
} // HasVSX
+def FpMinMax {
+ dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+ dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+}
+
+let AddedComplexity = 400, Predicates = [HasVSX] in {
+ // f32 Min.
+ def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+ def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+ def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+ def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+ // F32 Max.
+ def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+ def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+ def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+ def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+
+ // f64 Min.
+ def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+ def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+ def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+ def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+ // f64 Max.
+ def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+ def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+ def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+ def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+}
+
def ScalarLoads {
dag Li8 = (i32 (extloadi8 xoaddr:$src));
dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
@@ -1330,7 +1379,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07
- let mayLoad = 1, mayStore = 0 in {
+ let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
"lxsspx $XT, $src", IIC_LdStLFD, []>;
@@ -1355,7 +1404,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
- let mayStore = 1, mayLoad = 0 in {
+ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
"stxsspx $XT, $dst", IIC_LdStSTFD, []>;
@@ -1912,7 +1961,7 @@ def VectorExtractions {
- The order of elements after the move to GPR is reversed, so we invert
the bits of the index prior to truncating to the range 0-7
*/
- dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDIo8 $Idx, 8)));
+ dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8)));
dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC));
dag BE_MV_VBYTE = (MFVSRD
(EXTRACT_SUBREG
@@ -1931,7 +1980,7 @@ def VectorExtractions {
the bits of the index prior to truncating to the range 0-3
*/
dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDIo8 $Idx, 4), 1, 62)));
+ (RLDICR (ANDI8_rec $Idx, 4), 1, 62)));
dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC));
dag BE_MV_VHALF = (MFVSRD
(EXTRACT_SUBREG
@@ -1949,7 +1998,7 @@ def VectorExtractions {
the bits of the index prior to truncating to the range 0-1
*/
dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDIo8 $Idx, 2), 2, 61)));
+ (RLDICR (ANDI8_rec $Idx, 2), 2, 61)));
dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC));
dag BE_MV_VWORD = (MFVSRD
(EXTRACT_SUBREG
@@ -1965,7 +2014,7 @@ def VectorExtractions {
element indices.
*/
dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDIo8 $Idx, 1), 3, 60)));
+ (RLDICR (ANDI8_rec $Idx, 1), 3, 60)));
dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC));
dag BE_VARIABLE_DWORD =
(MFVSRD (EXTRACT_SUBREG
@@ -2477,6 +2526,43 @@ def : Pat<(i64 (bitconvert f64:$S)),
// (move to FPR, nothing else needed)
def : Pat<(f64 (bitconvert i64:$S)),
(f64 (MTVSRD $S))>;
+
+// Rounding to integer.
+def : Pat<(i64 (lrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (lrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (llrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (llrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (lround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (lround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+def : Pat<(i64 (llround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (llround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+}
+
+let Predicates = [HasVSX] in {
+// Rounding for single precision.
+def : Pat<(f32 (fround f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPI
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (fnearbyint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (ffloor f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIM
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (fceil f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (ftrunc f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIZ
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
}
// Materialize a zero-vector of long long
@@ -2502,7 +2588,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
list<dag> pattern>
- : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isDOT;
+ : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
// [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
// So we use different operand class for VRB
@@ -2520,7 +2606,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
list<dag> pattern>
- : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT;
+ : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
// [PO T XO B XO BX /]
class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
@@ -2550,7 +2636,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
list<dag> pattern>
- : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT;
+ : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
// [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
@@ -2562,7 +2648,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
list<dag> pattern>
- : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isDOT;
+ : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
//===--------------------------------------------------------------------===//
// Quad-Precision Scalar Move Instructions:
@@ -2884,13 +2970,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
//===--------------------------------------------------------------------===//
// Maximum/Minimum Type-C/Type-J DP
- // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT
- def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
+ def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
+ IIC_VecFP,
+ [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
IIC_VecFP, []>;
- def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
+ def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
+ IIC_VecFP,
+ [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
IIC_VecFP, []>;
@@ -2898,18 +2985,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Vector Byte-Reverse H/W/D/Q Word
def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>;
- def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc, []>;
- def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>;
+ def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
+ [(set v4i32:$XT, (bswap v4i32:$XB))]>;
+ def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
+ [(set v2i64:$XT, (bswap v2i64:$XB))]>;
def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
// Vector Reverse
- def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)),
+ def : Pat<(v8i16 (bswap v8i16 :$A)),
(v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
- def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)),
- (v4i32 (XXBRW $A))>;
- def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)),
- (v2i64 (XXBRD $A))>;
- def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)),
+ def : Pat<(v1i128 (bswap v1i128 :$A)),
(v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
// Vector Permute
@@ -2927,7 +3012,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayLoad = 1, mayStore = 0 in {
+ let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
"lxv $XT, $src", IIC_LdStLFD, []>;
@@ -2972,7 +3057,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayStore = 1, mayLoad = 0 in {
+ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
"stxv $XT, $dst", IIC_LdStSTFD, []>;
@@ -3697,6 +3782,15 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f128 (fpextend f32:$src)),
(f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
+ def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+ def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+
} // end HasP9Vector, AddedComplexity
let AddedComplexity = 400 in {
@@ -3710,7 +3804,7 @@ let AddedComplexity = 400 in {
}
}
-let Predicates = [HasP9Vector] in {
+let Predicates = [HasP9Vector], hasSideEffects = 0 in {
let mayStore = 1 in {
def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
(ins spilltovsrrc:$XT, memrr:$dst),
@@ -3865,8 +3959,20 @@ def DblToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A)))));
}
+// FP load dags (for f32 -> v4f32)
+def LoadFP {
+ dag A = (f32 (load xoaddr:$A));
+ dag B = (f32 (load xoaddr:$B));
+ dag C = (f32 (load xoaddr:$C));
+ dag D = (f32 (load xoaddr:$D));
+}
+
// FP merge dags (for f32 -> v4f32)
def MrgFP {
+ dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC);
+ dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC);
+ dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC);
+ dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC);
dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
(COPY_TO_REGCLASS $C, VSRC), 0));
dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
@@ -4022,7 +4128,18 @@ let AddedComplexity = 400 in {
(v2f64 (XXPERMDI
(COPY_TO_REGCLASS $A, VSRC),
(COPY_TO_REGCLASS $B, VSRC), 0))>;
-
+ // Using VMRGEW to assemble the final vector would be a lower latency
+ // solution. However, we choose to go with the slightly higher latency
+ // XXPERMDI for 2 reasons:
+ // 1. This is likely to occur in unrolled loops where regpressure is high,
+ // so we want to use the latter as it has access to all 64 VSX registers.
+ // 2. Using Altivec instructions in this sequence would likely cause the
+ // allocation of Altivec registers even for the loads which in turn would
+ // force the use of LXSIWZX for the loads, adding a cycle of latency to
+ // each of the loads which would otherwise be able to use LFIWZX.
+ def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
+ (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
(VMRGEW MrgFP.AC, MrgFP.BD)>;
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
@@ -4089,7 +4206,18 @@ let AddedComplexity = 400 in {
(v2f64 (XXPERMDI
(COPY_TO_REGCLASS $B, VSRC),
(COPY_TO_REGCLASS $A, VSRC), 0))>;
-
+ // Using VMRGEW to assemble the final vector would be a lower latency
+ // solution. However, we choose to go with the slightly higher latency
+ // XXPERMDI for 2 reasons:
+ // 1. This is likely to occur in unrolled loops where regpressure is high,
+ // so we want to use the latter as it has access to all 64 VSX registers.
+ // 2. Using Altivec instructions in this sequence would likely cause the
+ // allocation of Altivec registers even for the loads which in turn would
+ // force the use of LXSIWZX for the loads, adding a cycle of latency to
+ // each of the loads which would otherwise be able to use LFIWZX.
+ def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
+ (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
(VMRGEW MrgFP.AC, MrgFP.BD)>;
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
diff --git a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index d252cfbd26b1..b761f337533b 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -1,4 +1,4 @@
-//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
+//===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,19 +6,42 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements a pass to prepare loops for pre-increment addressing
-// modes. Additional PHIs are created for loop induction variables used by
-// load/store instructions so that the pre-increment forms can be used.
-// Generically, this means transforming loops like this:
-// for (int i = 0; i < n; ++i)
-// array[i] = c;
-// to look like this:
-// T *p = array[-1];
-// for (int i = 0; i < n; ++i)
-// *++p = c;
+// This file implements a pass to prepare loops for ppc preferred addressing
+// modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with
+// update)
+// Additional PHIs are created for loop induction variables used by load/store
+// instructions so that preferred addressing modes can be used.
+//
+// 1: DS/DQ form preparation, prepare the load/store instructions so that they
+// can satisfy the DS/DQ form displacement requirements.
+// Generically, this means transforming loops like this:
+// for (int i = 0; i < n; ++i) {
+// unsigned long x1 = *(unsigned long *)(p + i + 5);
+// unsigned long x2 = *(unsigned long *)(p + i + 9);
+// }
+//
+// to look like this:
+//
+// unsigned NewP = p + 5;
+// for (int i = 0; i < n; ++i) {
+// unsigned long x1 = *(unsigned long *)(i + NewP);
+// unsigned long x2 = *(unsigned long *)(i + NewP + 4);
+// }
+//
+// 2: D/DS form with update preparation, prepare the load/store instructions so
+// that we can use update form to do pre-increment.
+// Generically, this means transforming loops like this:
+// for (int i = 0; i < n; ++i)
+// array[i] = c;
+//
+// to look like this:
+//
+// T *p = array[-1];
+// for (int i = 0; i < n; ++i)
+// *++p = c;
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppc-loop-preinc-prep"
+#define DEBUG_TYPE "ppc-loop-instr-form-prep"
#include "PPC.h"
#include "PPCSubtarget.h"
@@ -32,7 +55,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
@@ -42,6 +64,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -49,6 +72,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
#include <iterator>
@@ -56,13 +80,49 @@
using namespace llvm;
-// By default, we limit this to creating 16 PHIs (which is a little over half
-// of the allocatable register set).
-static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
+// By default, we limit this to creating 16 common bases out of loops per
+// function. 16 is a little over half of the allocatable register set.
+static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars",
cl::Hidden, cl::init(16),
- cl::desc("Potential PHI threshold for PPC preinc loop prep"));
-
-STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
+ cl::desc("Potential common base number threshold per function for PPC loop "
+ "prep"));
+
+static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
+ cl::init(true), cl::Hidden,
+ cl::desc("prefer update form when ds form is also a update form"));
+
+// Sum of following 3 per loop thresholds for all loops can not be larger
+// than MaxVarsPrep.
+// By default, we limit this to creating 9 PHIs for one loop.
+// 9 and 3 for each kind prep are exterimental values on Power9.
+static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
+ cl::Hidden, cl::init(3),
+ cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
+ "form"));
+
+static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
+ cl::Hidden, cl::init(3),
+ cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
+
+static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
+ cl::Hidden, cl::init(3),
+ cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
+
+
+// If would not be profitable if the common base has only one load/store, ISEL
+// should already be able to choose best load/store form based on offset for
+// single load/store. Set minimal profitable value default to 2 and make it as
+// an option.
+static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
+ cl::Hidden, cl::init(2),
+ cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
+ "preparation"));
+
+STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");
+STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");
+STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");
+STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");
+STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");
STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
namespace {
@@ -82,16 +142,22 @@ namespace {
SmallVector<BucketElement, 16> Elements;
};
- class PPCLoopPreIncPrep : public FunctionPass {
+ // "UpdateForm" is not a real PPC instruction form, it stands for dform
+ // load/store with update like ldu/stdu, or Prefetch intrinsic.
+ // For DS form instructions, their displacements must be multiple of 4.
+ // For DQ form instructions, their displacements must be multiple of 16.
+ enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 };
+
+ class PPCLoopInstrFormPrep : public FunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
- PPCLoopPreIncPrep() : FunctionPass(ID) {
- initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+ PPCLoopInstrFormPrep() : FunctionPass(ID) {
+ initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());
}
- PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
- initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+ PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -111,12 +177,19 @@ namespace {
ScalarEvolution *SE;
bool PreserveLCSSA;
+ /// Successful preparation number for Update/DS/DQ form in all inner most
+ /// loops. One successful preparation will put one common base out of loop,
+ /// this may leads to register presure like LICM does.
+ /// Make sure total preparation number can be controlled by option.
+ unsigned SuccPrepCount;
+
bool runOnLoop(Loop *L);
/// Check if required PHI node is already exist in Loop \p L.
bool alreadyPrepared(Loop *L, Instruction* MemI,
const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV);
+ const SCEVConstant *BasePtrIncSCEV,
+ InstrForm Form);
/// Collect condition matched(\p isValidCandidate() returns true)
/// candidates in Loop \p L.
@@ -134,32 +207,50 @@ namespace {
/// Prepare all candidates in \p Buckets for update form.
bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
+ /// Prepare all candidates in \p Buckets for displacement form, now for
+ /// ds/dq.
+ bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
+ InstrForm Form);
+
/// Prepare for one chain \p BucketChain, find the best base element and
/// update all other elements in \p BucketChain accordingly.
+ /// \p Form is used to find the best base element.
+ /// If success, best base element must be stored as the first element of
+ /// \p BucketChain.
+ /// Return false if no base element found, otherwise return true.
+ bool prepareBaseForDispFormChain(Bucket &BucketChain,
+ InstrForm Form);
+
+ /// Prepare for one chain \p BucketChain, find the best base element and
+ /// update all other elements in \p BucketChain accordingly.
+ /// If success, best base element must be stored as the first element of
+ /// \p BucketChain.
+ /// Return false if no base element found, otherwise return true.
bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
/// Rewrite load/store instructions in \p BucketChain according to
/// preparation.
bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
- SmallSet<BasicBlock *, 16> &BBChanged);
+ SmallSet<BasicBlock *, 16> &BBChanged,
+ InstrForm Form);
};
} // end anonymous namespace
-char PPCLoopPreIncPrep::ID = 0;
-static const char *name = "Prepare loop for pre-inc. addressing modes";
-INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+char PPCLoopInstrFormPrep::ID = 0;
+static const char *name = "Prepare loop for ppc preferred instruction forms";
+INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
static const std::string PHINodeNameSuffix = ".phi";
static const std::string CastNodeNameSuffix = ".cast";
static const std::string GEPNodeIncNameSuffix = ".inc";
static const std::string GEPNodeOffNameSuffix = ".off";
-FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
- return new PPCLoopPreIncPrep(TM);
+FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) {
+ return new PPCLoopInstrFormPrep(TM);
}
static bool IsPtrInBounds(Value *BasePtr) {
@@ -193,7 +284,7 @@ static Value *GetPointerOperand(Value *MemI) {
return nullptr;
}
-bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
+bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -203,6 +294,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
DT = DTWP ? &DTWP->getDomTree() : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
ST = TM ? TM->getSubtargetImpl(F) : nullptr;
+ SuccPrepCount = 0;
bool MadeChange = false;
@@ -213,7 +305,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
return MadeChange;
}
-void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
+void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
SmallVector<Bucket, 16> &Buckets,
unsigned MaxCandidateNum) {
assert((MemI && GetPointerOperand(MemI)) &&
@@ -236,7 +328,7 @@ void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
}
}
-SmallVector<Bucket, 16> PPCLoopPreIncPrep::collectCandidates(
+SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
Loop *L,
std::function<bool(const Instruction *, const Value *)> isValidCandidate,
unsigned MaxCandidateNum) {
@@ -277,13 +369,82 @@ SmallVector<Bucket, 16> PPCLoopPreIncPrep::collectCandidates(
return Buckets;
}
-// TODO: implement a more clever base choosing policy.
+bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
+ InstrForm Form) {
+ // RemainderOffsetInfo details:
+ // key: value of (Offset urem DispConstraint). For DSForm, it can
+ // be [0, 4).
+ // first of pair: the index of first BucketElement whose remainder is equal
+ // to key. For key 0, this value must be 0.
+ // second of pair: number of load/stores with the same remainder.
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> RemainderOffsetInfo;
+
+ for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
+ if (!BucketChain.Elements[j].Offset)
+ RemainderOffsetInfo[0] = std::make_pair(0, 1);
+ else {
+ unsigned Remainder =
+ BucketChain.Elements[j].Offset->getAPInt().urem(Form);
+ if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
+ RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
+ else
+ RemainderOffsetInfo[Remainder].second++;
+ }
+ }
+ // Currently we choose the most profitable base as the one which has the max
+ // number of load/store with same remainder.
+ // FIXME: adjust the base selection strategy according to load/store offset
+ // distribution.
+ // For example, if we have one candidate chain for DS form preparation, which
+ // contains following load/stores with different remainders:
+ // 1: 10 load/store whose remainder is 1;
+ // 2: 9 load/store whose remainder is 2;
+ // 3: 1 for remainder 3 and 0 for remainder 0;
+ // Now we will choose the first load/store whose remainder is 1 as base and
+ // adjust all other load/stores according to new base, so we will get 10 DS
+ // form and 10 X form.
+ // But we should be more clever, for this case we could use two bases, one for
+ // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1
+ // X form.
+ unsigned MaxCountRemainder = 0;
+ for (unsigned j = 0; j < (unsigned)Form; j++)
+ if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
+ RemainderOffsetInfo[j].second >
+ RemainderOffsetInfo[MaxCountRemainder].second)
+ MaxCountRemainder = j;
+
+ // Abort when there are too few insts with common base.
+ if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold)
+ return false;
+
+ // If the first value is most profitable, no needed to adjust BucketChain
+ // elements as they are substracted the first value when collecting.
+ if (MaxCountRemainder == 0)
+ return true;
+
+ // Adjust load/store to the new chosen base.
+ const SCEV *Offset =
+ BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;
+ BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
+ for (auto &E : BucketChain.Elements) {
+ if (E.Offset)
+ E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+ else
+ E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+ }
+
+ std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],
+ BucketChain.Elements[0]);
+ return true;
+}
+
+// FIXME: implement a more clever base choosing policy.
// Currently we always choose an exist load/store offset. This maybe lead to
// suboptimal code sequences. For example, for one DS chain with offsets
// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
// multipler of 4, it cannot be represented by sint16.
-bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
+bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
// We have a choice now of which instruction's memory operand we use as the
// base for the generated PHI. Always picking the first instruction in each
// bucket does not work well, specifically because that instruction might
@@ -323,8 +484,9 @@ bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
return true;
}
-bool PPCLoopPreIncPrep::rewriteLoadStores(
- Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged) {
+bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
+ SmallSet<BasicBlock *, 16> &BBChanged,
+ InstrForm Form) {
bool MadeChange = false;
const SCEVAddRecExpr *BasePtrSCEV =
cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
@@ -345,19 +507,30 @@ bool PPCLoopPreIncPrep::rewriteLoadStores(
Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
BasePtr->getType()->getPointerAddressSpace());
- const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
- if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
+ if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L))
return MadeChange;
const SCEVConstant *BasePtrIncSCEV =
dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
if (!BasePtrIncSCEV)
return MadeChange;
- BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
+
+ // For some DS form load/store instructions, it can also be an update form,
+ // if the stride is a multipler of 4. Use update form if prefer it.
+ bool CanPreInc = (Form == UpdateForm ||
+ ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) &&
+ PreferUpdateForm));
+ const SCEV *BasePtrStartSCEV = nullptr;
+ if (CanPreInc)
+ BasePtrStartSCEV =
+ SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV);
+ else
+ BasePtrStartSCEV = BasePtrSCEV->getStart();
+
if (!isSafeToExpand(BasePtrStartSCEV, *SE))
return MadeChange;
- if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV))
+ if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form))
return MadeChange;
LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
@@ -377,31 +550,61 @@ bool PPCLoopPreIncPrep::rewriteLoadStores(
// Note that LoopPredecessor might occur in the predecessor list multiple
// times, and we need to add it the right number of times.
- for (const auto &PI : predecessors(Header)) {
+ for (auto PI : predecessors(Header)) {
if (PI != LoopPredecessor)
continue;
NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
}
- Instruction *InsPoint = &*Header->getFirstInsertionPt();
- GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
- PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
- for (const auto &PI : predecessors(Header)) {
- if (PI == LoopPredecessor)
- continue;
+ Instruction *PtrInc = nullptr;
+ Instruction *NewBasePtr = nullptr;
+ if (CanPreInc) {
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
+ PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
+ cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+ for (auto PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
- NewPHI->addIncoming(PtrInc, PI);
- }
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+ if (PtrInc->getType() != BasePtr->getType())
+ NewBasePtr = new BitCastInst(
+ PtrInc, BasePtr->getType(),
+ getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
+ else
+ NewBasePtr = PtrInc;
+ } else {
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to make sure no more incoming value for them in PHI.
+ for (auto PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
- Instruction *NewBasePtr;
- if (PtrInc->getType() != BasePtr->getType())
- NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
- getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
- else
- NewBasePtr = PtrInc;
+ // For the latch predecessor, we need to insert a GEP just before the
+ // terminator to increase the address.
+ BasicBlock *BB = PI;
+ Instruction *InsPoint = BB->getTerminator();
+ PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
+
+ cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+ PtrInc = NewPHI;
+ if (NewPHI->getType() != BasePtr->getType())
+ NewBasePtr =
+ new BitCastInst(NewPHI, BasePtr->getType(),
+ getInstrName(NewPHI, CastNodeNameSuffix),
+ &*Header->getFirstInsertionPt());
+ else
+ NewBasePtr = NewPHI;
+ }
if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
BBChanged.insert(IDel->getParent());
@@ -460,12 +663,20 @@ bool PPCLoopPreIncPrep::rewriteLoadStores(
}
MadeChange = true;
- UpdFormChainRewritten++;
+
+ SuccPrepCount++;
+
+ if (Form == DSForm && !CanPreInc)
+ DSFormChainRewritten++;
+ else if (Form == DQForm)
+ DQFormChainRewritten++;
+ else if (Form == UpdateForm || (Form == DSForm && CanPreInc))
+ UpdFormChainRewritten++;
return MadeChange;
}
-bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
+bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
SmallVector<Bucket, 16> &Buckets) {
bool MadeChange = false;
if (Buckets.empty())
@@ -475,7 +686,30 @@ bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
// The base address of each bucket is transformed into a phi and the others
// are rewritten based on new base.
if (prepareBaseForUpdateFormChain(Bucket))
- MadeChange |= rewriteLoadStores(L, Bucket, BBChanged);
+ MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
+
+ if (MadeChange)
+ for (auto &BB : L->blocks())
+ if (BBChanged.count(BB))
+ DeleteDeadPHIs(BB);
+ return MadeChange;
+}
+
+bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
+ InstrForm Form) {
+ bool MadeChange = false;
+
+ if (Buckets.empty())
+ return MadeChange;
+
+ SmallSet<BasicBlock *, 16> BBChanged;
+ for (auto &Bucket : Buckets) {
+ if (Bucket.Elements.size() < DispFormPrepMinThreshold)
+ continue;
+ if (prepareBaseForDispFormChain(Bucket, Form))
+ MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form);
+ }
+
if (MadeChange)
for (auto &BB : L->blocks())
if (BBChanged.count(BB))
@@ -483,13 +717,14 @@ bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
return MadeChange;
}
-// In order to prepare for the pre-increment a PHI is added.
+// In order to prepare for the preferred instruction form, a PHI is added.
// This function will check to see if that PHI already exists and will return
-// true if it found an existing PHI with the same start and increment as the
+// true if it found an existing PHI with the matched start and increment as the
// one we wanted to create.
-bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
+bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV) {
+ const SCEVConstant *BasePtrIncSCEV,
+ InstrForm Form) {
BasicBlock *BB = MemI->getParent();
if (!BB)
return false;
@@ -526,12 +761,25 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
CurrentPHINode->getIncomingBlock(1) == PredBB) ||
(CurrentPHINode->getIncomingBlock(1) == LatchBB &&
CurrentPHINode->getIncomingBlock(0) == PredBB)) {
- if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV &&
- PHIBasePtrIncSCEV == BasePtrIncSCEV) {
+ if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
// The existing PHI (CurrentPHINode) has the same start and increment
- // as the PHI that we wanted to create.
- ++PHINodeAlreadyExists;
- return true;
+ // as the PHI that we wanted to create.
+ if (Form == UpdateForm &&
+ PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
+ ++PHINodeAlreadyExistsUpdate;
+ return true;
+ }
+ if (Form == DSForm || Form == DQForm) {
+ const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
+ SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
+ if (Diff && !Diff->getAPInt().urem(Form)) {
+ if (Form == DSForm)
+ ++PHINodeAlreadyExistsDS;
+ else
+ ++PHINodeAlreadyExistsDQ;
+ return true;
+ }
+ }
}
}
}
@@ -539,13 +787,17 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
return false;
}
-bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
+bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
bool MadeChange = false;
// Only prep. the inner-most loop
if (!L->empty())
return MadeChange;
+ // Return if already done enough preparation.
+ if (SuccPrepCount >= MaxVarsPrep)
+ return MadeChange;
+
LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
BasicBlock *LoopPredecessor = L->getLoopPredecessor();
@@ -562,7 +814,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
return MadeChange;
}
-
// Check if a load/store has update form. This lambda is used by function
// collectCandidates which can collect candidates for types defined by lambda.
auto isUpdateFormCandidate = [&] (const Instruction *I,
@@ -591,15 +842,53 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
}
return true;
};
+
+ // Check if a load/store has DS form.
+ auto isDSFormCandidate = [] (const Instruction *I, const Value *PtrValue) {
+ assert((PtrValue && I) && "Invalid parameter!");
+ if (isa<IntrinsicInst>(I))
+ return false;
+ Type *PointerElementType = PtrValue->getType()->getPointerElementType();
+ return (PointerElementType->isIntegerTy(64)) ||
+ (PointerElementType->isFloatTy()) ||
+ (PointerElementType->isDoubleTy()) ||
+ (PointerElementType->isIntegerTy(32) &&
+ llvm::any_of(I->users(),
+ [](const User *U) { return isa<SExtInst>(U); }));
+ };
+
+ // Check if a load/store has DQ form.
+ auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) {
+ assert((PtrValue && I) && "Invalid parameter!");
+ return !isa<IntrinsicInst>(I) && ST && ST->hasP9Vector() &&
+ (PtrValue->getType()->getPointerElementType()->isVectorTy());
+ };
- // Collect buckets of comparable addresses used by loads, stores and prefetch
// intrinsic for update form.
SmallVector<Bucket, 16> UpdateFormBuckets =
- collectCandidates(L, isUpdateFormCandidate, MaxVars);
+ collectCandidates(L, isUpdateFormCandidate, MaxVarsUpdateForm);
// Prepare for update form.
if (!UpdateFormBuckets.empty())
MadeChange |= updateFormPrep(L, UpdateFormBuckets);
+ // Collect buckets of comparable addresses used by loads and stores for DS
+ // form.
+ SmallVector<Bucket, 16> DSFormBuckets =
+ collectCandidates(L, isDSFormCandidate, MaxVarsDSForm);
+
+ // Prepare for DS form.
+ if (!DSFormBuckets.empty())
+ MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
+
+ // Collect buckets of comparable addresses used by loads and stores for DQ
+ // form.
+ SmallVector<Bucket, 16> DQFormBuckets =
+ collectCandidates(L, isDQFormCandidate, MaxVarsDQForm);
+
+ // Prepare for DQ form.
+ if (!DQFormBuckets.empty())
+ MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
+
return MadeChange;
}
diff --git a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
new file mode 100644
index 000000000000..83cca11b27a3
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@@ -0,0 +1,164 @@
+//===-- PPCLowerMASSVEntries.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of MASSV (SIMD) entries for specific PowerPC
+// subtargets.
+// Following is an example of a conversion specific to Power9 subtarget:
+// __sind2_massv ---> __sind2_P9
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "ppc-lower-massv-entries"
+
+using namespace llvm;
+
+namespace {
+
+// Length of the suffix "massv", which is specific to IBM MASSV library entries.
+const unsigned MASSVSuffixLength = 5;
+
+static StringRef MASSVFuncs[] = {
+#define TLI_DEFINE_MASSV_VECFUNCS_NAMES
+#include "llvm/Analysis/VecFuncs.def"
+};
+
+class PPCLowerMASSVEntries : public ModulePass {
+public:
+ static char ID;
+
+ PPCLowerMASSVEntries() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override;
+
+ StringRef getPassName() const override { return "PPC Lower MASS Entries"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+private:
+ static bool isMASSVFunc(StringRef Name);
+ static StringRef getCPUSuffix(const PPCSubtarget *Subtarget);
+ static std::string createMASSVFuncName(Function &Func,
+ const PPCSubtarget *Subtarget);
+ bool lowerMASSVCall(CallInst *CI, Function &Func, Module &M,
+ const PPCSubtarget *Subtarget);
+};
+
+} // namespace
+
+/// Checks if the specified function name represents an entry in the MASSV
+/// library.
+bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) {
+ auto Iter = std::find(std::begin(MASSVFuncs), std::end(MASSVFuncs), Name);
+ return Iter != std::end(MASSVFuncs);
+}
+
+// FIXME:
+/// Returns a string corresponding to the specified PowerPC subtarget. e.g.:
+/// "P8" for Power8, "P9" for Power9. The string is used as a suffix while
+/// generating subtarget-specific MASSV library functions. Current support
+/// includes Power8 and Power9 subtargets.
+StringRef PPCLowerMASSVEntries::getCPUSuffix(const PPCSubtarget *Subtarget) {
+ // Assume Power8 when Subtarget is unavailable.
+ if (!Subtarget)
+ return "P8";
+ if (Subtarget->hasP9Vector())
+ return "P9";
+ if (Subtarget->hasP8Vector())
+ return "P8";
+
+ report_fatal_error("Unsupported Subtarget: MASSV is supported only on "
+ "Power8 and Power9 subtargets.");
+}
+
+/// Creates PowerPC subtarget-specific name corresponding to the specified
+/// generic MASSV function, and the PowerPC subtarget.
+std::string
+PPCLowerMASSVEntries::createMASSVFuncName(Function &Func,
+ const PPCSubtarget *Subtarget) {
+ StringRef Suffix = getCPUSuffix(Subtarget);
+ auto GenericName = Func.getName().drop_back(MASSVSuffixLength).str();
+ std::string MASSVEntryName = GenericName + Suffix.str();
+ return MASSVEntryName;
+}
+
+/// Lowers generic MASSV entries to PowerPC subtarget-specific MASSV entries.
+/// e.g.: __sind2_massv --> __sind2_P9 for a Power9 subtarget.
+/// Both function prototypes and their callsites are updated during lowering.
+bool PPCLowerMASSVEntries::lowerMASSVCall(CallInst *CI, Function &Func,
+ Module &M,
+ const PPCSubtarget *Subtarget) {
+ if (CI->use_empty())
+ return false;
+
+ std::string MASSVEntryName = createMASSVFuncName(Func, Subtarget);
+ FunctionCallee FCache = M.getOrInsertFunction(
+ MASSVEntryName, Func.getFunctionType(), Func.getAttributes());
+
+ CallSite CS(CI);
+ CI->setCalledFunction(FCache);
+
+ return true;
+}
+
+bool PPCLowerMASSVEntries::runOnModule(Module &M) {
+ bool Changed = false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return Changed;
+
+ auto &TM = TPC->getTM<PPCTargetMachine>();
+ const PPCSubtarget *Subtarget;
+
+ for (Function &Func : M) {
+ if (!Func.isDeclaration())
+ continue;
+
+ if (!isMASSVFunc(Func.getName()))
+ continue;
+
+ // Call to lowerMASSVCall() invalidates the iterator over users upon
+ // replacing the users. Precomputing the current list of users allows us to
+ // replace all the call sites.
+ SmallVector<User *, 4> MASSVUsers;
+ for (auto *User: Func.users())
+ MASSVUsers.push_back(User);
+
+ for (auto *User : MASSVUsers) {
+ auto *CI = dyn_cast<CallInst>(User);
+ if (!CI)
+ continue;
+
+ Subtarget = &TM.getSubtarget<PPCSubtarget>(*CI->getParent()->getParent());
+ Changed |= lowerMASSVCall(CI, Func, M, Subtarget);
+ }
+ }
+
+ return Changed;
+}
+
+char PPCLowerMASSVEntries::ID = 0;
+
+char &llvm::PPCLowerMASSVEntriesID = PPCLowerMASSVEntries::ID;
+
+INITIALIZE_PASS(PPCLowerMASSVEntries, DEBUG_TYPE, "Lower MASSV entries", false,
+ false)
+
+ModulePass *llvm::createPPCLowerMASSVEntriesPass() {
+ return new PPCLowerMASSVEntries();
+}
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index ac8ac060f460..74192cb20cd0 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -18,6 +18,8 @@
//
//===---------------------------------------------------------------------===//
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
@@ -26,12 +28,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
-#include "MCTargetDesc/PPCPredicates.h"
using namespace llvm;
@@ -160,33 +162,33 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
static unsigned
getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
unsigned Opcode = MI->getOpcode();
- if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
- Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
+ if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
+ Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
return MI->getOperand(3).getImm();
- if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
- MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
+ if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
+ MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
return MI->getOperand(3).getImm();
- if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
- Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
+ if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+ Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
- MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
+ MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
return 32 + MI->getOperand(3).getImm();
- if (Opcode == PPC::ANDIo) {
+ if (Opcode == PPC::ANDI_rec) {
uint16_t Imm = MI->getOperand(2).getImm();
return 48 + countLeadingZeros(Imm);
}
- if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
- Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
+ if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
+ Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
// The result ranges from 0 to 32.
return 58;
- if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
- Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
+ if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
+ Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
// The result ranges from 0 to 64.
return 57;
@@ -331,108 +333,121 @@ bool PPCMIPeephole::simplifyCode(void) {
// is identified by an immediate value of 0 or 3.
int Immed = MI.getOperand(3).getImm();
- if (Immed != 1) {
-
- // For each of these simplifications, we need the two source
- // regs to match. Unfortunately, MachineCSE ignores COPY and
- // SUBREG_TO_REG, so for example we can see
- // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
- // We have to look through chains of COPY and SUBREG_TO_REG
- // to find the real source values for comparison.
- unsigned TrueReg1 =
- TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
- unsigned TrueReg2 =
- TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
-
- if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) {
- MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
- unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
-
- // If this is a splat fed by a splatting load, the splat is
- // redundant. Replace with a copy. This doesn't happen directly due
- // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
- // a load of a double to a vector of 64-bit integers.
- auto isConversionOfLoadAndSplat = [=]() -> bool {
- if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
- return false;
- unsigned DefReg =
- TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- if (Register::isVirtualRegister(DefReg)) {
- MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
- if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
- return true;
- }
- return false;
- };
- if (DefMI && (Immed == 0 || Immed == 3)) {
- if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
- LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
- "to load-and-splat/copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
- }
+ if (Immed == 1)
+ break;
- // If this is a splat or a swap fed by another splat, we
- // can replace it with a copy.
- if (DefOpc == PPC::XXPERMDI) {
- unsigned FeedImmed = DefMI->getOperand(3).getImm();
- unsigned FeedReg1 =
- TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- unsigned FeedReg2 =
- TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
-
- if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
- "to splat/copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
-
- // If this is a splat fed by a swap, we can simplify modify
- // the splat to splat the other value from the swap's input
- // parameter.
- else if ((Immed == 0 || Immed == 3)
- && FeedImmed == 2 && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
- LLVM_DEBUG(MI.dump());
- MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
- MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
- MI.getOperand(3).setImm(3 - Immed);
- Simplified = true;
- }
-
- // If this is a swap fed by a swap, we can replace it
- // with a copy from the first swap's input.
- else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(DefMI->getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
- } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
- (DefMI->getOperand(2).getImm() == 0 ||
- DefMI->getOperand(2).getImm() == 3)) {
- // Splat fed by another splat - switch the output of the first
- // and remove the second.
- DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
- ToErase = &MI;
- Simplified = true;
- LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
- LLVM_DEBUG(MI.dump());
- }
+ // For each of these simplifications, we need the two source
+ // regs to match. Unfortunately, MachineCSE ignores COPY and
+ // SUBREG_TO_REG, so for example we can see
+ // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+ // We have to look through chains of COPY and SUBREG_TO_REG
+ // to find the real source values for comparison.
+ unsigned TrueReg1 =
+ TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+ unsigned TrueReg2 =
+ TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
+
+ if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)))
+ break;
+
+ MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+ if (!DefMI)
+ break;
+
+ unsigned DefOpc = DefMI->getOpcode();
+
+ // If this is a splat fed by a splatting load, the splat is
+ // redundant. Replace with a copy. This doesn't happen directly due
+ // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
+ // a load of a double to a vector of 64-bit integers.
+ auto isConversionOfLoadAndSplat = [=]() -> bool {
+ if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
+ return false;
+ unsigned FeedReg1 =
+ TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+ if (Register::isVirtualRegister(FeedReg1)) {
+ MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
+ if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
+ return true;
+ }
+ return false;
+ };
+ if ((Immed == 0 || Immed == 3) &&
+ (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
+ LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
+ "to load-and-splat/copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+
+ // If this is a splat or a swap fed by another splat, we
+ // can replace it with a copy.
+ if (DefOpc == PPC::XXPERMDI) {
+ unsigned DefReg1 = DefMI->getOperand(1).getReg();
+ unsigned DefReg2 = DefMI->getOperand(2).getReg();
+ unsigned DefImmed = DefMI->getOperand(3).getImm();
+
+ // If the two inputs are not the same register, check to see if
+ // they originate from the same virtual register after only
+ // copy-like instructions.
+ if (DefReg1 != DefReg2) {
+ unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
+ unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
+
+ if (!(FeedReg1 == FeedReg2 &&
+ Register::isVirtualRegister(FeedReg1)))
+ break;
+ }
+
+ if (DefImmed == 0 || DefImmed == 3) {
+ LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
+ "to splat/copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
}
+
+ // If this is a splat fed by a swap, we can simplify modify
+ // the splat to splat the other value from the swap's input
+ // parameter.
+ else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
+ LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+ LLVM_DEBUG(MI.dump());
+ MI.getOperand(1).setReg(DefReg1);
+ MI.getOperand(2).setReg(DefReg2);
+ MI.getOperand(3).setImm(3 - Immed);
+ Simplified = true;
+ }
+
+ // If this is a swap fed by a swap, we can replace it
+ // with a copy from the first swap's input.
+ else if (Immed == 2 && DefImmed == 2) {
+ LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(DefMI->getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+ } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
+ (DefMI->getOperand(2).getImm() == 0 ||
+ DefMI->getOperand(2).getImm() == 3)) {
+ // Splat fed by another splat - switch the output of the first
+ // and remove the second.
+ DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+ ToErase = &MI;
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
+ LLVM_DEBUG(MI.dump());
}
break;
}
@@ -805,6 +820,153 @@ bool PPCMIPeephole::simplifyCode(void) {
combineSEXTAndSHL(MI, ToErase);
break;
}
+ case PPC::RLWINM:
+ case PPC::RLWINM_rec:
+ case PPC::RLWINM8:
+ case PPC::RLWINM8_rec: {
+ unsigned FoldingReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(FoldingReg))
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
+ if (SrcMI->getOpcode() != PPC::RLWINM &&
+ SrcMI->getOpcode() != PPC::RLWINM_rec &&
+ SrcMI->getOpcode() != PPC::RLWINM8 &&
+ SrcMI->getOpcode() != PPC::RLWINM8_rec)
+ break;
+ assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
+ MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
+ SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
+ "Invalid PPC::RLWINM Instruction!");
+ uint64_t SHSrc = SrcMI->getOperand(2).getImm();
+ uint64_t SHMI = MI.getOperand(2).getImm();
+ uint64_t MBSrc = SrcMI->getOperand(3).getImm();
+ uint64_t MBMI = MI.getOperand(3).getImm();
+ uint64_t MESrc = SrcMI->getOperand(4).getImm();
+ uint64_t MEMI = MI.getOperand(4).getImm();
+
+ assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
+ "Invalid PPC::RLWINM Instruction!");
+
+ // If MBMI is bigger than MEMI, we always can not get run of ones.
+ // RotatedSrcMask non-wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: B---E B---E
+ // MaskMI: -----------|--E B------
+ // Result: ----- --- (Bad candidate)
+ //
+ // RotatedSrcMask wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: --E B----|--E B----
+ // MaskMI: -----------|--E B------
+ // Result: --- -----|--- ----- (Bad candidate)
+ //
+ // One special case is RotatedSrcMask is a full set mask.
+ // RotatedSrcMask full:
+ // 0........31|32........63
+ // RotatedSrcMask: ------EB---|-------EB---
+ // MaskMI: -----------|--E B------
+ // Result: -----------|--- ------- (Good candidate)
+
+ // Mark special case.
+ bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
+
+ // For other MBMI > MEMI cases, just return.
+ if ((MBMI > MEMI) && !SrcMaskFull)
+ break;
+
+ // Handle MBMI <= MEMI cases.
+ APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
+ // In MI, we only need low 32 bits of SrcMI, just consider about low 32
+ // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
+ // while in PowerPC ISA, lowerest bit is at index 63.
+ APInt MaskSrc =
+ APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
+ // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is
+ // equal to highBit.
+ // If MBSrc - MESrc == 1, we expect a full set mask instead of Null.
+ if (SrcMaskFull && (MBSrc - MESrc == 1))
+ MaskSrc.setAllBits();
+
+ APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
+ APInt FinalMask = RotatedSrcMask & MaskMI;
+ uint32_t NewMB, NewME;
+
+ // If final mask is 0, MI result should be 0 too.
+ if (FinalMask.isNullValue()) {
+ bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 ||
+ MI.getOpcode() == PPC::RLWINM8_rec);
+
+ Simplified = true;
+
+ LLVM_DEBUG(dbgs() << "Replace Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
+ // Replace MI with "LI 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.getOperand(1).ChangeToImmediate(0);
+ MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI));
+ } else {
+ // Replace MI with "ANDI_rec reg, 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.getOperand(2).setImm(0);
+ MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+ }
+
+ LLVM_DEBUG(dbgs() << "With: ");
+ LLVM_DEBUG(MI.dump());
+ } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
+ NewME) && NewMB <= NewME)|| SrcMaskFull) {
+ // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
+ // than NewME. Otherwise we get a 64 bit value after folding, but MI
+ // return a 32 bit value.
+
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Converting Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ uint16_t NewSH = (SHSrc + SHMI) % 32;
+ MI.getOperand(2).setImm(NewSH);
+ // If SrcMI mask is full, no need to update MBMI and MEMI.
+ if (!SrcMaskFull) {
+ MI.getOperand(3).setImm(NewMB);
+ MI.getOperand(4).setImm(NewME);
+ }
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+
+ LLVM_DEBUG(dbgs() << "To: ");
+ LLVM_DEBUG(MI.dump());
+ }
+ if (Simplified) {
+ // If FoldingReg has no non-debug use and it has no implicit def (it
+ // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
+ // Otherwise keep it.
+ ++NumRotatesCollapsed;
+ if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) {
+ ToErase = SrcMI;
+ LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+ LLVM_DEBUG(SrcMI->dump());
+ }
+ }
+ break;
+ }
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index dfae19804d94..2b341b5952c8 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -42,7 +42,7 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
/// function. This is only valid after the initial scan of the function by
/// PEI.
- bool MustSaveLR;
+ bool MustSaveLR = false;
/// MustSaveTOC - Indicates that the TOC save needs to be performed in the
/// prologue of the function. This is typically the case when there are
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index b1c0433641dd..a4b4bf2973d1 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -35,6 +35,8 @@ STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
STATISTIC(NumberOfSelfCopies,
"Number of self copy instructions eliminated");
+STATISTIC(NumFrameOffFoldInPreEmit,
+ "Number of folding frame offset by using r+r in pre-emit peephole");
static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -161,8 +163,19 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole)
+ if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
+ // Remove UNENCODED_NOP even when this pass is disabled.
+ // This needs to be done unconditionally so we don't emit zeros
+ // in the instruction stream.
+ SmallVector<MachineInstr *, 4> InstrsToErase;
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (MI.getOpcode() == PPC::UNENCODED_NOP)
+ InstrsToErase.push_back(&MI);
+ for (MachineInstr *MI : InstrsToErase)
+ MI->eraseFromParent();
return false;
+ }
bool Changed = false;
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -171,6 +184,10 @@ namespace {
Changed |= removeRedundantLIs(MBB, TRI);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
+ if (Opc == PPC::UNENCODED_NOP) {
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
// Detect self copies - these can result from running AADB.
if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
const MCInstrDesc &MCID = TII->get(Opc);
@@ -202,6 +219,12 @@ namespace {
InstrsToErase.push_back(DefMIToErase);
}
}
+ if (TII->foldFrameOffset(MI)) {
+ Changed = true;
+ NumFrameOffFoldInPreEmit++;
+ LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
+ LLVM_DEBUG(MI.dump());
+ }
}
// Eliminate conditional branch based on a constant CR bit by
diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 3b71ed219c17..90cc81beb89d 100644
--- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -375,10 +376,10 @@ public:
};
private:
- const PPCInstrInfo *TII;
- MachineFunction *MF;
- MachineRegisterInfo *MRI;
- const MachineBranchProbabilityInfo *MBPI;
+ const PPCInstrInfo *TII = nullptr;
+ MachineFunction *MF = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
// A vector to contain all the CR logical operations
SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
@@ -470,21 +471,21 @@ PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
} else {
MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
Ret.SubregDef1, Ret.CopyDefs.first);
+ assert(Def1 && "Must be able to find a definition of operand 1.");
Ret.DefsSingleUse &=
MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
Ret.DefsSingleUse &=
MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
- assert(Def1 && "Must be able to find a definition of operand 1.");
if (isBinary(MIParam)) {
Ret.IsBinary = 1;
MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
Ret.SubregDef2,
Ret.CopyDefs.second);
+ assert(Def2 && "Must be able to find a definition of operand 2.");
Ret.DefsSingleUse &=
MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
Ret.DefsSingleUse &=
MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
- assert(Def2 && "Must be able to find a definition of operand 2.");
Ret.TrueDefs = std::make_pair(Def1, Def2);
} else {
Ret.TrueDefs = std::make_pair(Def1, nullptr);
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9ec26a19bdaa..01b97ba6ab20 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -380,7 +380,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
// This is eiher:
// 1) A fixed frame index object which we know are aligned so
// as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't
- // need to consider the alignement here.
+ // need to consider the alignment here.
// 2) A not fixed object but in that case we now know that the min required
// alignment is no more than 1 based on the previous check.
if (InstrInfo->isXFormMemOp(Opcode))
@@ -747,12 +747,18 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
Register SrcReg = MI.getOperand(0).getReg();
// Search up the BB to find the definition of the CR bit.
- MachineBasicBlock::reverse_iterator Ins;
+ MachineBasicBlock::reverse_iterator Ins = MI;
+ MachineBasicBlock::reverse_iterator Rend = MBB.rend();
+ ++Ins;
unsigned CRBitSpillDistance = 0;
- for (Ins = MI; Ins != MBB.rend(); Ins++) {
+ bool SeenUse = false;
+ for (; Ins != Rend; ++Ins) {
// Definition found.
if (Ins->modifiesRegister(SrcReg, TRI))
break;
+ // Use found.
+ if (Ins->readsRegister(SrcReg, TRI))
+ SeenUse = true;
// Unable to find CR bit definition within maximum search distance.
if (CRBitSpillDistance == MaxCRBitSpillDist) {
Ins = MI;
@@ -767,17 +773,35 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
if (Ins == MBB.rend())
Ins = MI;
+ bool SpillsKnownBit = false;
// There is no need to extract the CR bit if its value is already known.
switch (Ins->getOpcode()) {
case PPC::CRUNSET:
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
.addImm(0);
+ SpillsKnownBit = true;
break;
case PPC::CRSET:
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
.addImm(-32768);
+ SpillsKnownBit = true;
break;
default:
+ // On Power9, we can use SETB to extract the LT bit. This only works for
+ // the LT bit since SETB produces -1/1/0 for LT/GT/<neither>. So the value
+ // of the bit we care about (32-bit sign bit) will be set to the value of
+ // the LT bit (regardless of the other bits in the CR field).
+ if (Subtarget.isISA3_0()) {
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR1LT ||
+ SrcReg == PPC::CR2LT || SrcReg == PPC::CR3LT ||
+ SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
+ SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
+ break;
+ }
+ }
+
// We need to move the CR field that contains the CR bit we are spilling.
// The super register may not be explicitly defined (i.e. it can be defined
// by a CR-logical that only defines the subreg) so we state that the CR
@@ -803,8 +827,13 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
.addReg(Reg, RegState::Kill),
FrameIndex);
+ bool KillsCRBit = MI.killsRegister(SrcReg, TRI);
// Discard the pseudo instruction.
MBB.erase(II);
+ if (SpillsKnownBit && KillsCRBit && !SeenUse) {
+ Ins->setDesc(TII.get(PPC::UNENCODED_NOP));
+ Ins->RemoveOperand(0);
+ }
}
void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index a50e05920cd4..a5fbb0c6ec64 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -61,6 +61,15 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
public:
PPCRegisterInfo(const PPCTargetMachine &TM);
+ /// getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode
+ /// for a given imm form load/store opcode \p ImmFormOpcode.
+ /// FIXME: move this to PPCInstrInfo class.
+ unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const {
+ if (!ImmToIdxMap.count(ImmOpcode))
+ return PPC::INSTRUCTION_LIST_END;
+ return ImmToIdxMap.find(ImmOpcode)->second;
+ }
+
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 10568ed4b655..0997f68bd999 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -61,7 +61,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
void PPCSubtarget::initializeEnvironment() {
StackAlignment = Align(16);
- DarwinDirective = PPC::DIR_NONE;
+ CPUDirective = PPC::DIR_NONE;
HasMFOCRF = false;
Has64BitSupport = false;
Use64BitRegs = false;
@@ -100,6 +100,7 @@ void PPCSubtarget::initializeEnvironment() {
IsPPC6xx = false;
IsE500 = false;
FeatureMFTB = false;
+ AllowsUnalignedFPAccess = false;
DeprecatedDST = false;
HasLazyResolverStubs = false;
HasICBT = false;
@@ -126,6 +127,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// If cross-compiling with -march=ppc64le without -mcpu
if (TargetTriple.getArch() == Triple::ppc64le)
CPUName = "ppc64le";
+ else if (TargetTriple.getSubArch() == Triple::PPCSubArch_spe)
+ CPUName = "e500";
else
CPUName = "generic";
}
@@ -190,7 +193,7 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
bool PPCSubtarget::enableMachineScheduler() const { return true; }
bool PPCSubtarget::enableMachinePipeliner() const {
- return (DarwinDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
+ return (CPUDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
}
bool PPCSubtarget::useDFAforSMS() const { return false; }
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index d96c2893aee9..044e982740e9 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -57,6 +57,7 @@ namespace PPC {
DIR_PWR7,
DIR_PWR8,
DIR_PWR9,
+ DIR_PWR_FUTURE,
DIR_64
};
}
@@ -84,7 +85,7 @@ protected:
InstrItineraryData InstrItins;
/// Which cpu directive was used.
- unsigned DarwinDirective;
+ unsigned CPUDirective;
/// Used by the ISel to turn in optimizations for POWER4-derived architectures
bool HasMFOCRF;
@@ -123,6 +124,7 @@ protected:
bool IsPPC4xx;
bool IsPPC6xx;
bool FeatureMFTB;
+ bool AllowsUnalignedFPAccess;
bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsLittleEndian;
@@ -169,8 +171,11 @@ public:
Align getStackAlignment() const { return StackAlignment; }
/// getDarwinDirective - Returns the -m directive specified for the cpu.
+ unsigned getDarwinDirective() const { return CPUDirective; }
+
+ /// getCPUDirective - Returns the -m directive specified for the cpu.
///
- unsigned getDarwinDirective() const { return DarwinDirective; }
+ unsigned getCPUDirective() const { return CPUDirective; }
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
@@ -270,6 +275,7 @@ public:
bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
bool isE500() const { return IsE500; }
bool isFeatureMFTB() const { return FeatureMFTB; }
+ bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; }
bool isDeprecatedDST() const { return DeprecatedDST; }
bool hasICBT() const { return HasICBT; }
bool hasInvariantFunctionDescriptors() const {
@@ -347,6 +353,41 @@ public:
/// True if the GV will be accessed via an indirect symbol.
bool isGVIndirectSymbol(const GlobalValue *GV) const;
+ /// True if the ABI is descriptor based.
+ bool usesFunctionDescriptors() const {
+ // Both 32-bit and 64-bit AIX are descriptor based. For ELF only the 64-bit
+ // v1 ABI uses descriptors.
+ return isAIXABI() || (is64BitELFABI() && !isELFv2ABI());
+ }
+
+ unsigned descriptorTOCAnchorOffset() const {
+ assert(usesFunctionDescriptors() &&
+ "Should only be called when the target uses descriptors.");
+ return IsPPC64 ? 8 : 4;
+ }
+
+ unsigned descriptorEnvironmentPointerOffset() const {
+ assert(usesFunctionDescriptors() &&
+ "Should only be called when the target uses descriptors.");
+ return IsPPC64 ? 16 : 8;
+ }
+
+ MCRegister getEnvironmentPointerRegister() const {
+ assert(usesFunctionDescriptors() &&
+ "Should only be called when the target uses descriptors.");
+ return IsPPC64 ? PPC::X11 : PPC::R11;
+ }
+
+ MCRegister getTOCPointerRegister() const {
+ assert((is64BitELFABI() || isAIXABI()) &&
+ "Should only be called when the target is a TOC based ABI.");
+ return IsPPC64 ? PPC::X2 : PPC::R2;
+ }
+
+ MCRegister getStackPointerRegister() const {
+ return IsPPC64 ? PPC::X1 : PPC::R1;
+ }
+
bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
};
} // End llvm namespace
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 8f313d9d01c4..17e1196eea59 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index abefee8b339d..2caf4c99a1f8 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -51,8 +51,8 @@ opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
cl::desc("Disable CTR loops for PPC"));
static cl::
-opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
- cl::desc("Disable PPC loop preinc prep"));
+opt<bool> DisableInstrFormPrep("disable-ppc-instr-form-prep", cl::Hidden,
+ cl::desc("Disable PPC loop instr form prep"));
static cl::opt<bool>
VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
@@ -77,7 +77,7 @@ EnableGEPOpt("ppc-gep-opt", cl::Hidden,
static cl::opt<bool>
EnablePrefetch("enable-ppc-prefetching",
- cl::desc("disable software prefetching on PPC"),
+ cl::desc("enable software prefetching on PPC"),
cl::init(false), cl::Hidden);
static cl::opt<bool>
@@ -94,7 +94,7 @@ static cl::opt<bool>
ReduceCRLogical("ppc-reduce-cr-logicals",
cl::desc("Expand eligible cr-logical binary ops to branches"),
cl::init(true), cl::Hidden);
-extern "C" void LLVMInitializePowerPCTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
@@ -104,7 +104,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
#ifndef NDEBUG
initializePPCCTRLoopsVerifyPass(PR);
#endif
- initializePPCLoopPreIncPrepPass(PR);
+ initializePPCLoopInstrFormPrepPass(PR);
initializePPCTOCRegDepsPass(PR);
initializePPCEarlyReturnPass(PR);
initializePPCVSXCopyPass(PR);
@@ -119,6 +119,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
initializePPCPreEmitPeepholePass(PR);
initializePPCTLSDynamicCallPass(PR);
initializePPCMIPeepholePass(PR);
+ initializePPCLowerMASSVEntriesPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -214,8 +215,6 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
case Triple::ppc64le:
return PPCTargetMachine::PPC_ABI_ELFv2;
case Triple::ppc64:
- if (TT.getEnvironment() == llvm::Triple::ELFv2)
- return PPCTargetMachine::PPC_ABI_ELFv2;
return PPCTargetMachine::PPC_ABI_ELFv1;
default:
return PPCTargetMachine::PPC_ABI_UNKNOWN;
@@ -401,6 +400,9 @@ void PPCPassConfig::addIRPasses() {
addPass(createPPCBoolRetToIntPass());
addPass(createAtomicExpandPass());
+ // Lower generic MASSV routines to PowerPC subtarget-specific entries.
+ addPass(createPPCLowerMASSVEntriesPass());
+
// For the BG/Q (or if explicitly requested), add explicit data prefetch
// intrinsics.
bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
@@ -427,8 +429,8 @@ void PPCPassConfig::addIRPasses() {
}
bool PPCPassConfig::addPreISel() {
- if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
- addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
+ if (!DisableInstrFormPrep && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createHardwareLoopsPass());
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f51300c656aa..e05699cc95ec 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -84,10 +84,10 @@ int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}
-int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
+ return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty);
assert(Ty->isIntegerTy());
@@ -118,10 +118,10 @@ int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
+ return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty);
assert(Ty->isIntegerTy());
@@ -283,24 +283,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
case Intrinsic::loop_decrement:
return true;
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-# pragma pop_macro("setjmp")
-# undef setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::longjmp:
-
// Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
// because, although it does clobber the counter register, the
// control can't then return to inside the loop unless there is also
@@ -331,8 +313,12 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::lrint: Opcode = ISD::LRINT; break;
+ case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::lround: Opcode = ISD::LROUND; break;
+ case Intrinsic::llround: Opcode = ISD::LLROUND; break;
case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
@@ -550,7 +536,7 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
- if (ST->getDarwinDirective() == PPC::DIR_A2) {
+ if (ST->getCPUDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
UP.Partial = UP.Runtime = true;
@@ -576,7 +562,7 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
// on combining the loads generated for consecutive accesses, and failure to
// do so is particularly expensive. This makes it much more likely (compared
// to only using concatenation unrolling).
- if (ST->getDarwinDirective() == PPC::DIR_A2)
+ if (ST->getCPUDirective() == PPC::DIR_A2)
return true;
return LoopHasReductions;
@@ -598,8 +584,8 @@ unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
assert(ClassID == GPRRC || ClassID == FPRRC ||
ClassID == VRRC || ClassID == VSXRC);
if (ST->hasVSX()) {
- assert(ClassID == GPRRC || ClassID == VSXRC);
- return ClassID == GPRRC ? 32 : 64;
+ assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
+ return ClassID == VSXRC ? 64 : 32;
}
assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
return 32;
@@ -608,8 +594,14 @@ unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
if (Vector)
return ST->hasVSX() ? VSXRC : VRRC;
- else if (Ty && Ty->getScalarType()->isFloatTy())
+ else if (Ty && (Ty->getScalarType()->isFloatTy() ||
+ Ty->getScalarType()->isDoubleTy()))
return ST->hasVSX() ? VSXRC : FPRRC;
+ else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
+ Ty->getScalarType()->isPPC_FP128Ty()))
+ return VRRC;
+ else if (Ty && Ty->getScalarType()->isHalfTy())
+ return VSXRC;
else
return GPRRC;
}
@@ -646,9 +638,10 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
return CacheLineSize;
// On P7, P8 or P9 we have a cache line size of 128.
- unsigned Directive = ST->getDarwinDirective();
+ unsigned Directive = ST->getCPUDirective();
+ // Assume that Future CPU has the same cache line size as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
- Directive == PPC::DIR_PWR9)
+ Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
return 128;
// On other processors return a default of 64 bytes.
@@ -662,7 +655,7 @@ unsigned PPCTTIImpl::getPrefetchDistance() const {
}
unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
- unsigned Directive = ST->getDarwinDirective();
+ unsigned Directive = ST->getCPUDirective();
// The 440 has no SIMD support, but floating-point instructions
// have a 5-cycle latency, so unroll by 5x for latency hiding.
if (Directive == PPC::DIR_440)
@@ -680,8 +673,9 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// For P7 and P8, floating-point instructions have a 6-cycle latency and
// there are two execution units, so unroll by 12x for latency hiding.
// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
+ // Assume that future is the same as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
- Directive == PPC::DIR_PWR9)
+ Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
return 12;
// For most things, modern systems have two execution units (and
@@ -716,10 +710,13 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
return Cost * 2;
}
-int PPCTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
- TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
@@ -829,8 +826,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return Cost;
}
-int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I) {
+int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ MaybeAlign Alignment, unsigned AddressSpace,
+ const Instruction *I) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@@ -888,7 +886,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
// to be decomposed based on the alignment factor.
// Add the cost of each scalar load or store.
- Cost += LT.first*(SrcBytes/Alignment-1);
+ assert(Alignment);
+ Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
// For a vector type, there is also scalarization overhead (only for
// stores, loads are expanded using the vector-load + permutation sequence,
@@ -919,7 +918,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
// Firstly, the cost of load/store operation.
- int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+ int Cost =
+ getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
@@ -931,6 +931,20 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
return Cost;
}
+unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+}
+
+unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
+ if (ID == Intrinsic::bswap && ST->hasP9Vector())
+ return TLI->getTypeLegalizationCost(DL, RetTy).first;
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+ ScalarizationCostPassed);
+}
+
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 83a70364bf68..35388d14f606 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -46,9 +46,10 @@ public:
using BaseT::getIntImmCost;
int getIntImmCost(const APInt &Imm, Type *Ty);
- int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
@@ -90,14 +91,15 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
@@ -106,6 +108,11 @@ public:
unsigned AddressSpace,
bool UseMaskForCond = false,
bool UseMaskForGaps = false);
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
/// @}
};
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 5e150be544ed..3e6d1c7939f1 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 99b5dec74668..649bd648a6cf 100644
--- a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -23,7 +23,7 @@ Target &llvm::getThePPC64LETarget() {
return ThePPC64LETarget;
}
-extern "C" void LLVMInitializePowerPCTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetInfo() {
RegisterTarget<Triple::ppc, /*HasJIT=*/true> X(getThePPC32Target(), "ppc32",
"PowerPC 32", "PPC");