aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp257
-rw-r--r--llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp38
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp53
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h40
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp92
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp20
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h28
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp27
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h39
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td15
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp10
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp24
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp118
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h7
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h3
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp163
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h86
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h1
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp63
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h12
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp26
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h13
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPC.h63
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td56
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp668
-rw-r--r--llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp26
-rw-r--r--llvm/lib/Target/PowerPC/PPCCCState.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCCTRLoops.cpp92
-rw-r--r--llvm/lib/Target/PowerPC/PPCCallingConv.td28
-rw-r--r--llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp26
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp19
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp498
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp835
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp3647
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h126
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td112
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td170
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFormats.td55
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrHTM.td5
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp1073
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h126
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td450
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrPrefix.td1735
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrQPX.td1212
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrSPE.td10
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td375
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp40
-rw-r--r--llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCMCInstLower.cpp14
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp280
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp34
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h37
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp140
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp240
-rw-r--r--llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp161
-rw-r--r--llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp207
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h27
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.td106
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP9.td11
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp56
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h44
-rw-r--r--llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp49
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp111
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.h5
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp373
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h11
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp30
-rw-r--r--llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp9
-rw-r--r--llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h1
75 files changed, 8653 insertions, 5915 deletions
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 13fd7d05ab9f..197fd3c7aa74 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -11,7 +11,6 @@
#include "PPCTargetStreamer.h"
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -99,12 +98,10 @@ struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
bool IsPPC64;
- bool IsDarwin;
void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
bool isPPC64() const { return IsPPC64; }
- bool isDarwin() const { return IsDarwin; }
bool MatchRegisterName(unsigned &RegNo, int64_t &IntVal);
@@ -116,14 +113,12 @@ class PPCAsmParser : public MCTargetAsmParser {
PPCMCExpr::VariantKind &Variant);
const MCExpr *FixupVariantKind(const MCExpr *E);
bool ParseExpression(const MCExpr *&EVal);
- bool ParseDarwinExpression(const MCExpr *&EVal);
bool ParseOperand(OperandVector &Operands);
bool ParseDirectiveWord(unsigned Size, AsmToken ID);
bool ParseDirectiveTC(unsigned Size, AsmToken ID);
bool ParseDirectiveMachine(SMLoc L);
- bool ParseDarwinDirectiveMachine(SMLoc L);
bool ParseDirectiveAbiVersion(SMLoc L);
bool ParseDirectiveLocalEntry(SMLoc L);
@@ -150,7 +145,6 @@ public:
// Check for 64-bit vs. 32-bit pointer mode.
const Triple &TheTriple = STI.getTargetTriple();
IsPPC64 = TheTriple.isPPC64();
- IsDarwin = TheTriple.isMacOSX();
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
@@ -290,6 +284,16 @@ public:
return (unsigned) Imm.Val;
}
+ unsigned getACCReg() const {
+ assert(isACCRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
+ unsigned getVSRpEvenReg() const {
+ assert(isVSRpEvenRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val >> 1;
+ }
+
unsigned getCCReg() const {
assert(isCCRegNumber() && "Invalid access!");
return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
@@ -402,6 +406,12 @@ public:
(getImm() & 3) == 0); }
bool isImmZero() const { return Kind == Immediate && getImm() == 0; }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isACCRegNumber() const {
+ return Kind == Immediate && isUInt<3>(getImm());
+ }
+ bool isVSRpEvenRegNumber() const {
+ return Kind == Immediate && isUInt<6>(getImm()) && ((getImm() & 1) == 0);
+ }
bool isVSRegNumber() const {
return Kind == Immediate && isUInt<6>(getImm());
}
@@ -492,29 +502,29 @@ public:
Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
}
- void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
+ void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
}
- void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
+ void addRegSPERCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(SPERegs[getReg()]));
}
- void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
+ void addRegACCRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(ACCRegs[getACCReg()]));
}
- void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
+ void addRegVSRpRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()]));
}
- void addRegSPERCOperands(MCInst &Inst, unsigned N) const {
+ void addRegVSRpEvenRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(SPERegs[getReg()]));
+ Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()]));
}
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
@@ -666,7 +676,8 @@ public:
return CreateImm(CE->getValue(), S, E, IsPPC64);
if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Val))
- if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
+ if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS_PCREL)
return CreateTLSReg(SRE, S, E, IsPPC64);
if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) {
@@ -762,12 +773,18 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
}
case PPC::DCBFx:
case PPC::DCBFL:
- case PPC::DCBFLP: {
+ case PPC::DCBFLP:
+ case PPC::DCBFPS:
+ case PPC::DCBSTPS: {
int L = 0;
if (Opcode == PPC::DCBFL)
L = 1;
else if (Opcode == PPC::DCBFLP)
L = 3;
+ else if (Opcode == PPC::DCBFPS)
+ L = 4;
+ else if (Opcode == PPC::DCBSTPS)
+ L = 6;
MCInst TmpInst;
TmpInst.setOpcode(PPC::DCBF);
@@ -1184,41 +1201,41 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) {
- if (getParser().getTok().is(AsmToken::Identifier)) {
- StringRef Name = getParser().getTok().getString();
- if (Name.equals_lower("lr")) {
- RegNo = isPPC64()? PPC::LR8 : PPC::LR;
- IntVal = 8;
- } else if (Name.equals_lower("ctr")) {
- RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
- IntVal = 9;
- } else if (Name.equals_lower("vrsave")) {
- RegNo = PPC::VRSAVE;
- IntVal = 256;
- } else if (Name.startswith_lower("r") &&
- !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
- RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
- } else if (Name.startswith_lower("f") &&
- !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
- RegNo = FRegs[IntVal];
- } else if (Name.startswith_lower("vs") &&
- !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
- RegNo = VSRegs[IntVal];
- } else if (Name.startswith_lower("v") &&
- !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
- RegNo = VRegs[IntVal];
- } else if (Name.startswith_lower("q") &&
- !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
- RegNo = QFRegs[IntVal];
- } else if (Name.startswith_lower("cr") &&
- !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
- RegNo = CRRegs[IntVal];
- } else
- return true;
- getParser().Lex();
- return false;
- }
- return true;
+ if (getParser().getTok().is(AsmToken::Percent))
+ getParser().Lex(); // Eat the '%'.
+
+ if (!getParser().getTok().is(AsmToken::Identifier))
+ return true;
+
+ StringRef Name = getParser().getTok().getString();
+ if (Name.equals_lower("lr")) {
+ RegNo = isPPC64() ? PPC::LR8 : PPC::LR;
+ IntVal = 8;
+ } else if (Name.equals_lower("ctr")) {
+ RegNo = isPPC64() ? PPC::CTR8 : PPC::CTR;
+ IntVal = 9;
+ } else if (Name.equals_lower("vrsave")) {
+ RegNo = PPC::VRSAVE;
+ IntVal = 256;
+ } else if (Name.startswith_lower("r") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = isPPC64() ? XRegs[IntVal] : RRegs[IntVal];
+ } else if (Name.startswith_lower("f") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = FRegs[IntVal];
+ } else if (Name.startswith_lower("vs") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
+ RegNo = VSRegs[IntVal];
+ } else if (Name.startswith_lower("v") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = VRegs[IntVal];
+ } else if (Name.startswith_lower("cr") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
+ RegNo = CRRegs[IntVal];
+ } else
+ return true;
+ getParser().Lex();
+ return false;
}
bool PPCAsmParser::
@@ -1387,10 +1404,6 @@ FixupVariantKind(const MCExpr *E) {
/// it handles modifiers.
bool PPCAsmParser::
ParseExpression(const MCExpr *&EVal) {
-
- if (isDarwin())
- return ParseDarwinExpression(EVal);
-
// (ELF Platforms)
// Handle \code @l/@ha \endcode
if (getParser().parseExpression(EVal))
@@ -1406,53 +1419,6 @@ ParseExpression(const MCExpr *&EVal) {
return false;
}
-/// ParseDarwinExpression. (MachO Platforms)
-/// This differs from the default "parseExpression" in that it handles detection
-/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present,
-/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO
-/// syntax form so it is done here. TODO: Determine if there is merit in
-/// arranging for this to be done at a higher level.
-bool PPCAsmParser::
-ParseDarwinExpression(const MCExpr *&EVal) {
- MCAsmParser &Parser = getParser();
- PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
- switch (getLexer().getKind()) {
- default:
- break;
- case AsmToken::Identifier:
- // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus
- // something starting with any other char should be part of the
- // asm syntax. If handwritten asm includes an identifier like lo16,
- // then all bets are off - but no-one would do that, right?
- StringRef poss = Parser.getTok().getString();
- if (poss.equals_lower("lo16")) {
- Variant = PPCMCExpr::VK_PPC_LO;
- } else if (poss.equals_lower("hi16")) {
- Variant = PPCMCExpr::VK_PPC_HI;
- } else if (poss.equals_lower("ha16")) {
- Variant = PPCMCExpr::VK_PPC_HA;
- }
- if (Variant != PPCMCExpr::VK_PPC_None) {
- Parser.Lex(); // Eat the xx16
- if (getLexer().isNot(AsmToken::LParen))
- return Error(Parser.getTok().getLoc(), "expected '('");
- Parser.Lex(); // Eat the '('
- }
- break;
- }
-
- if (getParser().parseExpression(EVal))
- return true;
-
- if (Variant != PPCMCExpr::VK_PPC_None) {
- if (getLexer().isNot(AsmToken::RParen))
- return Error(Parser.getTok().getLoc(), "expected ')'");
- Parser.Lex(); // Eat the ')'
- EVal = PPCMCExpr::create(Variant, EVal, getParser().getContext());
- }
- return false;
-}
-
/// ParseOperand
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
@@ -1466,8 +1432,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
switch (getLexer().getKind()) {
// Special handling for register names. These are interpreted
// as immediates corresponding to the register number.
- case AsmToken::Percent:
- Parser.Lex(); // Eat the '%'.
+ case AsmToken::Percent: {
unsigned RegNo;
int64_t IntVal;
if (MatchRegisterName(RegNo, IntVal))
@@ -1475,7 +1440,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
-
+ }
case AsmToken::Identifier:
case AsmToken::LParen:
case AsmToken::Plus:
@@ -1485,20 +1450,6 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
case AsmToken::Dollar:
case AsmToken::Exclaim:
case AsmToken::Tilde:
- // Note that non-register-name identifiers from the compiler will begin
- // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include
- // identifiers like r31foo - so we fall through in the event that parsing
- // a register name fails.
- if (isDarwin()) {
- unsigned RegNo;
- int64_t IntVal;
- if (!MatchRegisterName(RegNo, IntVal)) {
- Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
- return false;
- }
- }
- // All other expressions
-
if (!ParseExpression(EVal))
break;
// Fall-through
@@ -1537,29 +1488,18 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
int64_t IntVal;
switch (getLexer().getKind()) {
- case AsmToken::Percent:
- Parser.Lex(); // Eat the '%'.
+ case AsmToken::Percent: {
unsigned RegNo;
if (MatchRegisterName(RegNo, IntVal))
return Error(S, "invalid register name");
break;
-
+ }
case AsmToken::Integer:
- if (isDarwin())
- return Error(S, "unexpected integer value");
- else if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 ||
- IntVal > 31)
+ if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 ||
+ IntVal > 31)
return Error(S, "invalid register number");
break;
- case AsmToken::Identifier:
- if (isDarwin()) {
- unsigned RegNo;
- if (!MatchRegisterName(RegNo, IntVal)) {
- break;
- }
- }
- LLVM_FALLTHROUGH;
-
+ case AsmToken::Identifier:
default:
return Error(S, "invalid memory operand");
}
@@ -1643,12 +1583,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
/// ParseDirective parses the PPC specific directives
bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
- if (isDarwin()) {
- if (IDVal == ".machine")
- ParseDarwinDirectiveMachine(DirectiveID.getLoc());
- else
- return true;
- } else if (IDVal == ".word")
+ if (IDVal == ".word")
ParseDirectiveWord(2, DirectiveID);
else if (IDVal == ".llong")
ParseDirectiveWord(8, DirectiveID);
@@ -1720,11 +1655,7 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
// FIXME: Right now, the parser always allows any available
// instruction, so the .machine directive is not useful.
- // Implement ".machine any" (by doing nothing) for the benefit
- // of existing assembler code. Likewise, we can then implement
- // ".machine push" and ".machine pop" as no-op.
- if (CPU != "any" && CPU != "push" && CPU != "pop")
- return TokError("unrecognized machine type");
+ // In the wild, any/push/pop/ppc64/altivec/power[4-9] are seen.
Parser.Lex();
@@ -1739,31 +1670,6 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
return false;
}
-/// ParseDarwinDirectiveMachine (Mach-o platforms)
-/// ::= .machine cpu-identifier
-bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
- MCAsmParser &Parser = getParser();
- if (Parser.getTok().isNot(AsmToken::Identifier) &&
- Parser.getTok().isNot(AsmToken::String))
- return Error(L, "unexpected token in directive");
-
- StringRef CPU = Parser.getTok().getIdentifier();
- Parser.Lex();
-
- // FIXME: this is only the 'default' set of cpu variants.
- // However we don't act on this information at present, this is simply
- // allowing parsing to proceed with minimal sanity checking.
- if (check(CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64", L,
- "unrecognized cpu type") ||
- check(isPPC64() && (CPU == "ppc7400" || CPU == "ppc"), L,
- "wrong cpu type specified for 64bit") ||
- check(!isPPC64() && CPU == "ppc64", L,
- "wrong cpu type specified for 32bit") ||
- parseToken(AsmToken::EndOfStatement))
- return addErrorSuffix(" in '.machine' directive");
- return false;
-}
-
/// ParseDirectiveAbiVersion
/// ::= .abiversion constant-expression
bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
@@ -1809,8 +1715,9 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
/// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() {
RegisterMCAsmParser<PPCAsmParser> A(getThePPC32Target());
- RegisterMCAsmParser<PPCAsmParser> B(getThePPC64Target());
- RegisterMCAsmParser<PPCAsmParser> C(getThePPC64LETarget());
+ RegisterMCAsmParser<PPCAsmParser> B(getThePPC32LETarget());
+ RegisterMCAsmParser<PPCAsmParser> C(getThePPC64Target());
+ RegisterMCAsmParser<PPCAsmParser> D(getThePPC64LETarget());
}
#define GET_REGISTER_MATCHER
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 74c6fd3733f0..3e9286fb0b30 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -54,6 +54,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() {
// Register the disassembler for each target.
TargetRegistry::RegisterMCDisassembler(getThePPC32Target(),
createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(getThePPC32LETarget(),
+ createPPCLEDisassembler);
TargetRegistry::RegisterMCDisassembler(getThePPC64Target(),
createPPCDisassembler);
TargetRegistry::RegisterMCDisassembler(getThePPC64LETarget(),
@@ -167,18 +169,24 @@ static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
-static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, QFRegs);
-}
-
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
return decodeRegisterClass(Inst, RegNo, SPERegs);
}
+static DecodeStatus DecodeACCRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, ACCRegs);
+}
+
+static DecodeStatus DecodeVSRpRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSRpRegs);
+}
+
#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
@@ -206,6 +214,15 @@ static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo & 1)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(VSRpRegs[RegNo >> 1]));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
// Decode the memri field (imm, reg), which has the low 16-bits as the
@@ -401,14 +418,9 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Read the instruction in the proper endianness.
uint64_t Inst = ReadFunc(Bytes.data());
- if (STI.getFeatureBits()[PPC::FeatureQPX]) {
- DecodeStatus result =
- decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
- if (result != MCDisassembler::Fail)
- return result;
- } else if (STI.getFeatureBits()[PPC::FeatureSPE]) {
+ if (STI.getFeatureBits()[PPC::FeatureSPE]) {
DecodeStatus result =
- decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
+ decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
return result;
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
new file mode 100644
index 000000000000..e8f8cbfee6ee
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
@@ -0,0 +1,53 @@
+//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the lowering of LLVM calls to machine code calls for
+/// GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+#include "PPCCallLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ppc-call-lowering"
+
+using namespace llvm;
+
+PPCCallLowering::PPCCallLowering(const PPCTargetLowering &TLI)
+ : CallLowering(&TLI) {}
+
+bool PPCCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI,
+ Register SwiftErrorVReg) const {
+ assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
+ "Return value without a vreg");
+ if (VRegs.size() > 0)
+ return false;
+
+ MIRBuilder.buildInstr(PPC::BLR8);
+ return true;
+}
+
+bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
+
+ // If VRegs is empty, then there are no formal arguments to lower and thus can
+ // always return true. If there are formal arguments, we currently do not
+ // handle them and thus return false.
+ return VRegs.empty();
+}
+
+bool PPCCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
+ return false;
+}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
new file mode 100644
index 000000000000..5a449f4cab1b
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
@@ -0,0 +1,40 @@
+//===-- PPCCallLowering.h - Call lowering for GlobalISel -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM calls to machine code calls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCCALLLOWERING_H
+
+#include "PPCISelLowering.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+class PPCTargetLowering;
+
+class PPCCallLowering : public CallLowering {
+public:
+ PPCCallLowering(const PPCTargetLowering &TLI);
+
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
+ Register SwiftErrorVReg) const override;
+ bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
+};
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
new file mode 100644
index 000000000000..7d64816ed6c7
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
@@ -0,0 +1,92 @@
+//===- PPCInstructionSelector.cpp --------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for
+/// PowerPC.
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPCRegisterBankInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ppc-gisel"
+
+using namespace llvm;
+
+namespace {
+
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
+class PPCInstructionSelector : public InstructionSelector {
+public:
+ PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI,
+ const PPCRegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) override;
+ static const char *getName() { return DEBUG_TYPE; }
+
+private:
+ /// tblgen generated 'select' implementation that is used as the initial
+ /// selector for the patterns that do not require complex C++.
+ bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+
+ const PPCInstrInfo &TII;
+ const PPCRegisterInfo &TRI;
+ const PPCRegisterBankInfo &RBI;
+
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
+
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM,
+ const PPCSubtarget &STI,
+ const PPCRegisterBankInfo &RBI)
+ : InstructionSelector(), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "PPCGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
+
+bool PPCInstructionSelector::select(MachineInstr &I) {
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+ return false;
+}
+
+namespace llvm {
+InstructionSelector *
+createPPCInstructionSelector(const PPCTargetMachine &TM,
+ const PPCSubtarget &Subtarget,
+ const PPCRegisterBankInfo &RBI) {
+ return new PPCInstructionSelector(TM, Subtarget, RBI);
+}
+} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
new file mode 100644
index 000000000000..c16bcaea592b
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
@@ -0,0 +1,20 @@
+//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for PowerPC
+//===----------------------------------------------------------------------===//
+
+#include "PPCLegalizerInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ppc-legalinfo"
+
+using namespace llvm;
+using namespace LegalizeActions;
+
+PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) { computeTables(); }
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h
new file mode 100644
index 000000000000..c73186d3d0c1
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h
@@ -0,0 +1,28 @@
+//===- PPCLegalizerInfo.h ----------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for PowerPC
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_POWERPC_GISEL_PPCMACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class PPCSubtarget;
+
+/// This class provides the information for the PowerPC target legalizer for
+/// GlobalISel.
+class PPCLegalizerInfo : public LegalizerInfo {
+public:
+ PPCLegalizerInfo(const PPCSubtarget &ST);
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
new file mode 100644
index 000000000000..6af79324919c
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -0,0 +1,27 @@
+//===- PPCRegisterBankInfo.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for
+/// PowerPC.
+//===----------------------------------------------------------------------===//
+
+#include "PPCRegisterBankInfo.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ppc-reg-bank-info"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "PPCGenRegisterBank.inc"
+
+using namespace llvm;
+
+PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI)
+ : PPCGenRegisterBankInfo() {}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
new file mode 100644
index 000000000000..358d5ed3cf14
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
@@ -0,0 +1,39 @@
+//===-- PPCRegisterBankInfo.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for PowerPC.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGBANK_DECLARATIONS
+#include "PPCGenRegisterBank.inc"
+
+namespace llvm {
+class TargetRegisterInfo;
+
+class PPCGenRegisterBankInfo : public RegisterBankInfo {
+protected:
+#define GET_TARGET_REGBANK_CLASS
+#include "PPCGenRegisterBank.inc"
+};
+
+class PPCRegisterBankInfo final : public PPCGenRegisterBankInfo {
+public:
+ PPCRegisterBankInfo(const TargetRegisterInfo &TRI);
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
new file mode 100644
index 000000000000..0e8a4b7061c5
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
@@ -0,0 +1,15 @@
+//===-- PPCRegisterBanks.td - Describe the PPC Banks -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Define the PPC register banks used for GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers
+def GPRRegBank : RegisterBank<"GPR", [G8RC]>;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index dbaf221db9fc..72401668c8d0 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -46,6 +46,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
case PPC::fixup_ppc_half16ds:
return Value & 0xfffc;
case PPC::fixup_ppc_pcrel34:
+ case PPC::fixup_ppc_imm34:
return Value & 0x3ffffffff;
}
}
@@ -68,6 +69,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case PPC::fixup_ppc_br24_notoc:
return 4;
case PPC::fixup_ppc_pcrel34:
+ case PPC::fixup_ppc_imm34:
case FK_Data_8:
return 8;
case PPC::fixup_ppc_nofixup:
@@ -100,6 +102,7 @@ public:
{ "fixup_ppc_half16", 0, 16, 0 },
{ "fixup_ppc_half16ds", 0, 14, 0 },
{ "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_imm34", 0, 34, 0 },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
@@ -112,6 +115,7 @@ public:
{ "fixup_ppc_half16", 0, 16, 0 },
{ "fixup_ppc_half16ds", 2, 14, 0 },
{ "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_imm34", 0, 34, 0 },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
@@ -178,12 +182,6 @@ public:
}
}
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- // FIXME.
- return false;
- }
-
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *DF,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index d8b3301e97f1..94ef7b45434f 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -138,6 +138,15 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_GOT_PCREL:
Type = ELF::R_PPC64_GOT_PCREL34;
break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL:
+ Type = ELF::R_PPC64_GOT_TLSGD_PCREL34;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL:
+ Type = ELF::R_PPC64_GOT_TLSLD_PCREL34;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL:
+ Type = ELF::R_PPC64_GOT_TPREL_PCREL34;
+ break;
}
break;
case FK_Data_4:
@@ -407,6 +416,21 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
else
Type = ELF::R_PPC_TLS;
break;
+ case MCSymbolRefExpr::VK_PPC_TLS_PCREL:
+ Type = ELF::R_PPC64_TLS;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_imm34:
+ switch (Modifier) {
+ default:
+ report_fatal_error("Unsupported Modifier for fixup_ppc_imm34.");
+ case MCSymbolRefExpr::VK_DTPREL:
+ Type = ELF::R_PPC64_DTPREL34;
+ break;
+ case MCSymbolRefExpr::VK_TPREL:
+ Type = ELF::R_PPC64_TPREL34;
+ break;
}
break;
case FK_Data_8:
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 4373778cc96c..386d59266096 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -20,6 +20,7 @@
#include "PPCELFStreamer.h"
+#include "PPCFixupKinds.h"
#include "PPCInstrInfo.h"
#include "PPCMCCodeEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -89,12 +90,33 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst,
PPCMCCodeEmitter *Emitter =
static_cast<PPCMCCodeEmitter*>(getAssembler().getEmitterPtr());
+ // If the instruction is a part of the GOT to PC-Rel link time optimization
+ // instruction pair, return a value, otherwise return None. A true returned
+ // value means the instruction is the PLDpc and a false value means it is
+ // the user instruction.
+ Optional<bool> IsPartOfGOTToPCRelPair = isPartOfGOTToPCRelPair(Inst, STI);
+
+ // User of the GOT-indirect address.
+ // For example, the load that will get the relocation as follows:
+ // .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+ // lwa 3, 4(3)
+ if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue())
+ emitGOTToPCRelReloc(Inst);
+
// Special handling is only for prefixed instructions.
if (!Emitter->isPrefixedInstruction(Inst)) {
MCELFStreamer::emitInstruction(Inst, STI);
return;
}
emitPrefixedInstruction(Inst, STI);
+
+ // Producer of the GOT-indirect address.
+ // For example, the prefixed load from the got that will get the label as
+ // follows:
+ // pld 3, vec@got@pcrel(0), 1
+ // .Lpcrel1:
+ if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue())
+ emitGOTToPCRelLabel(Inst);
}
void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
@@ -103,6 +125,102 @@ void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
MCELFStreamer::emitLabel(Symbol);
}
+// This linker time GOT PC Relative optimization relocation will look like this:
+// pld <reg> symbol@got@pcrel
+// <Label###>:
+// .reloc Label###-8,R_PPC64_PCREL_OPT,.-(Label###-8)
+// load <loadedreg>, 0(<reg>)
+// The reason we place the label after the PLDpc instruction is that there
+// may be an alignment nop before it since prefixed instructions must not
+// cross a 64-byte boundary (please see
+// PPCELFStreamer::emitPrefixedInstruction()). When referring to the
+// label, we subtract the width of a prefixed instruction (8 bytes) to ensure
+// we refer to the PLDpc.
+void PPCELFStreamer::emitGOTToPCRelReloc(const MCInst &Inst) {
+ // Get the last operand which contains the symbol.
+ const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1);
+ assert(Operand.isExpr() && "Expecting an MCExpr.");
+ // Cast the last operand to MCSymbolRefExpr to get the symbol.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT &&
+ "Expecting a symbol of type VK_PPC_PCREL_OPT");
+ MCSymbol *LabelSym =
+ getContext().getOrCreateSymbol(SymExpr->getSymbol().getName());
+ const MCExpr *LabelExpr = MCSymbolRefExpr::create(LabelSym, getContext());
+ const MCExpr *Eight = MCConstantExpr::create(8, getContext());
+ // SubExpr is just Label###-8
+ const MCExpr *SubExpr =
+ MCBinaryExpr::createSub(LabelExpr, Eight, getContext());
+ MCSymbol *CurrentLocation = getContext().createTempSymbol();
+ const MCExpr *CurrentLocationExpr =
+ MCSymbolRefExpr::create(CurrentLocation, getContext());
+ // SubExpr2 is .-(Label###-8)
+ const MCExpr *SubExpr2 =
+ MCBinaryExpr::createSub(CurrentLocationExpr, SubExpr, getContext());
+
+ MCDataFragment *DF = static_cast<MCDataFragment *>(LabelSym->getFragment());
+ assert(DF && "Expecting a valid data fragment.");
+ MCFixupKind FixupKind = static_cast<MCFixupKind>(FirstLiteralRelocationKind +
+ ELF::R_PPC64_PCREL_OPT);
+ DF->getFixups().push_back(
+ MCFixup::create(LabelSym->getOffset() - 8, SubExpr2,
+ FixupKind, Inst.getLoc()));
+ emitLabel(CurrentLocation, Inst.getLoc());
+}
+
+// Emit the label that immediately follows the PLDpc for a link time GOT PC Rel
+// optimization.
+void PPCELFStreamer::emitGOTToPCRelLabel(const MCInst &Inst) {
+ // Get the last operand which contains the symbol.
+ const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1);
+ assert(Operand.isExpr() && "Expecting an MCExpr.");
+ // Cast the last operand to MCSymbolRefExpr to get the symbol.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT &&
+ "Expecting a symbol of type VK_PPC_PCREL_OPT");
+ MCSymbol *LabelSym =
+ getContext().getOrCreateSymbol(SymExpr->getSymbol().getName());
+ emitLabel(LabelSym, Inst.getLoc());
+}
+
+// This funciton checks if the parameter Inst is part of the setup for a link
+// time GOT PC Relative optimization. For example in this situation:
+// <MCInst PLDpc <MCOperand Reg:282> <MCOperand Expr:(glob_double@got@pcrel)>
+// <MCOperand Imm:0> <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
+// <MCInst SOME_LOAD <MCOperand Reg:22> <MCOperand Imm:0> <MCOperand Reg:282>
+// <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
+// The above is a pair of such instructions and this function will not return
+// None for either one of them. In both cases we are looking for the last
+// operand <MCOperand Expr:(.Lpcrel@<<invalid>>)> which needs to be an MCExpr
+// and has the flag MCSymbolRefExpr::VK_PPC_PCREL_OPT. After that we just look
+// at the opcode and in the case of PLDpc we will return true. For the load
+// (or store) this function will return false indicating it has found the second
+// instruciton in the pair.
+Optional<bool> llvm::isPartOfGOTToPCRelPair(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ // Need at least two operands.
+ if (Inst.getNumOperands() < 2)
+ return None;
+
+ unsigned LastOp = Inst.getNumOperands() - 1;
+ // The last operand needs to be an MCExpr and it needs to have a variant kind
+ // of VK_PPC_PCREL_OPT. If it does not satisfy these conditions it is not a
+ // link time GOT PC Rel opt instruction and we can ignore it and return None.
+ const MCOperand &Operand = Inst.getOperand(LastOp);
+ if (!Operand.isExpr())
+ return None;
+
+ // Check for the variant kind VK_PPC_PCREL_OPT in this expression.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_PCREL_OPT)
+ return None;
+
+ return (Inst.getOpcode() == PPC::PLDpc);
+}
+
MCELFStreamer *llvm::createPPCELFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
index 51863232d071..f44200104f32 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -43,8 +43,15 @@ public:
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
private:
void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
+ void emitGOTToPCRelReloc(const MCInst &Inst);
+ void emitGOTToPCRelLabel(const MCInst &Inst);
};
+// Check if the instruction Inst is part of a pair of instructions that make up
+// a link time GOT PC Rel optimization.
+Optional<bool> isPartOfGOTToPCRelPair(const MCInst &Inst,
+ const MCSubtargetInfo &STI);
+
MCELFStreamer *createPPCELFStreamer(MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 2fb8947fd4e0..73292f7b7938 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -43,6 +43,9 @@ enum Fixups {
// A 34-bit fixup corresponding to PC-relative paddi.
fixup_ppc_pcrel34,
+ // A 34-bit fixup corresponding to Non-PC-relative paddi.
+ fixup_ppc_imm34,
+
/// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
/// TLS general and local dynamic models, or inserts the thread-pointer
/// register number.
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 16da62a74b8c..a291a34d4c52 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -49,18 +49,6 @@ FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden,
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
const char *RegName = getRegisterName(RegNo);
- if (RegName[0] == 'q' /* QPX */) {
- // The system toolchain on the BG/Q does not understand QPX register names
- // in .cfi_* directives, so print the name of the floating-point
- // subregister instead.
- std::string RN(RegName);
-
- RN[0] = 'f';
- OS << RN;
-
- return;
- }
-
OS << RegName;
}
@@ -83,15 +71,45 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
"reference expression if it is an expression at all.");
O << "\taddis ";
- printOperand(MI, 0, O);
+ printOperand(MI, 0, STI, O);
O << ", ";
- printOperand(MI, 2, O);
+ printOperand(MI, 2, STI, O);
O << "(";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ")";
return;
}
+ // Check if the last operand is an expression with the variant kind
+ // VK_PPC_PCREL_OPT. If this is the case then this is a linker optimization
+ // relocation and the .reloc directive needs to be added.
+ unsigned LastOp = MI->getNumOperands() - 1;
+ if (MI->getNumOperands() > 1) {
+ const MCOperand &Operand = MI->getOperand(LastOp);
+ if (Operand.isExpr()) {
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr =
+ static_cast<const MCSymbolRefExpr *>(Expr);
+
+ if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT) {
+ const MCSymbol &Symbol = SymExpr->getSymbol();
+ if (MI->getOpcode() == PPC::PLDpc) {
+ printInstruction(MI, Address, STI, O);
+ O << "\n";
+ Symbol.print(O, &MAI);
+ O << ":";
+ return;
+ } else {
+ O << "\t.reloc ";
+ Symbol.print(O, &MAI);
+ O << "-8,R_PPC64_PCREL_OPT,.-(";
+ Symbol.print(O, &MAI);
+ O << "-8)\n";
+ }
+ }
+ }
+ }
+
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
@@ -106,9 +124,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
SH = 32-SH;
}
if (useSubstituteMnemonic) {
- printOperand(MI, 0, O);
+ printOperand(MI, 0, STI, O);
O << ", ";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", " << (unsigned int)SH;
printAnnotation(O, Annot);
@@ -123,9 +141,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
if (63-SH == ME) {
O << "\tsldi ";
- printOperand(MI, 0, O);
+ printOperand(MI, 0, STI, O);
O << ", ";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", " << (unsigned int)SH;
printAnnotation(O, Annot);
return;
@@ -153,9 +171,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (IsBookE && TH != 0 && TH != 16)
O << (unsigned int) TH << ", ";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", ";
- printOperand(MI, 2, O);
+ printOperand(MI, 2, STI, O);
if (!IsBookE && TH != 0 && TH != 16)
O << ", " << (unsigned int) TH;
@@ -166,29 +184,36 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (MI->getOpcode() == PPC::DCBF) {
unsigned char L = MI->getOperand(0).getImm();
- if (!L || L == 1 || L == 3) {
- O << "\tdcbf";
- if (L == 1 || L == 3)
+ if (!L || L == 1 || L == 3 || L == 4 || L == 6) {
+ O << "\tdcb";
+ if (L != 6)
+ O << "f";
+ if (L == 1)
O << "l";
if (L == 3)
- O << "p";
+ O << "lp";
+ if (L == 4)
+ O << "ps";
+ if (L == 6)
+ O << "stps";
O << " ";
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", ";
- printOperand(MI, 2, O);
+ printOperand(MI, 2, STI, O);
printAnnotation(O, Annot);
return;
}
}
- if (!printAliasInstr(MI, Address, O))
- printInstruction(MI, Address, O);
+ if (!printAliasInstr(MI, Address, STI, O))
+ printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
}
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O,
const char *Modifier) {
unsigned Code = MI->getOperand(OpNo).getImm();
@@ -282,10 +307,11 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
assert(StringRef(Modifier) == "reg" &&
"Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!");
- printOperand(MI, OpNo+1, O);
+ printOperand(MI, OpNo + 1, STI, O);
}
void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Code = MI->getOperand(OpNo).getImm();
if (Code == 2)
@@ -295,6 +321,7 @@ void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 1 && "Invalid u1imm argument!");
@@ -302,6 +329,7 @@ void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 3 && "Invalid u2imm argument!");
@@ -309,6 +337,7 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 8 && "Invalid u3imm argument!");
@@ -316,6 +345,7 @@ void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 15 && "Invalid u4imm argument!");
@@ -323,6 +353,7 @@ void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
Value = SignExtend32<5>(Value);
@@ -330,6 +361,7 @@ void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value == 0 && "Operand must be zero");
@@ -337,6 +369,7 @@ void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 31 && "Invalid u5imm argument!");
@@ -344,6 +377,7 @@ void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 63 && "Invalid u6imm argument!");
@@ -351,6 +385,7 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 127 && "Invalid u7imm argument!");
@@ -361,12 +396,14 @@ void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo,
// of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and
// print as unsigned.
void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned char Value = MI->getOperand(OpNo).getImm();
O << (unsigned int)Value;
}
void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned short Value = MI->getOperand(OpNo).getImm();
assert(Value <= 1023 && "Invalid u10imm argument!");
@@ -374,6 +411,7 @@ void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned short Value = MI->getOperand(OpNo).getImm();
assert(Value <= 4095 && "Invalid u12imm argument!");
@@ -381,14 +419,16 @@ void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
O << (short)MI->getOperand(OpNo).getImm();
else
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, STI, O);
}
void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm()) {
long long Value = MI->getOperand(OpNo).getImm();
@@ -396,21 +436,24 @@ void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo,
O << (long long)Value;
}
else
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, STI, O);
}
void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
O << (unsigned short)MI->getOperand(OpNo).getImm();
else
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, STI, O);
}
void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
- unsigned OpNo, raw_ostream &O) {
+ unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
if (!MI->getOperand(OpNo).isImm())
- return printOperand(MI, OpNo, O);
+ return printOperand(MI, OpNo, STI, O);
int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
if (PrintBranchImmAsAddress) {
uint64_t Target = Address + Imm;
@@ -433,16 +476,16 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
}
void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (!MI->getOperand(OpNo).isImm())
- return printOperand(MI, OpNo, O);
+ return printOperand(MI, OpNo, STI, O);
O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
}
-
void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned CCReg = MI->getOperand(OpNo).getReg();
unsigned RegNo;
switch (CCReg) {
@@ -460,33 +503,37 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
}
void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- printS16ImmOperand(MI, OpNo, O);
+ printS16ImmOperand(MI, OpNo, STI, O);
O << '(';
if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
O << "0";
else
- printOperand(MI, OpNo+1, O);
+ printOperand(MI, OpNo + 1, STI, O);
O << ')';
}
void PPCInstPrinter::printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- printS34ImmOperand(MI, OpNo, O);
+ printS34ImmOperand(MI, OpNo, STI, O);
O << '(';
- printImmZeroOperand(MI, OpNo + 1, O);
+ printImmZeroOperand(MI, OpNo + 1, STI, O);
O << ')';
}
void PPCInstPrinter::printMemRegImm34(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printS34ImmOperand(MI, OpNo, O);
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printS34ImmOperand(MI, OpNo, STI, O);
O << '(';
- printOperand(MI, OpNo + 1, O);
+ printOperand(MI, OpNo + 1, STI, O);
O << ')';
}
void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
// When used as the base register, r0 reads constant zero rather than
// the value contained in the register. For this reason, the darwin
@@ -494,13 +541,13 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
if (MI->getOperand(OpNo).getReg() == PPC::R0)
O << "0";
else
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, STI, O);
O << ", ";
- printOperand(MI, OpNo+1, O);
+ printOperand(MI, OpNo + 1, STI, O);
}
void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
// On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
// come at the _end_ of the expression.
const MCOperand &Op = MI->getOperand(OpNo);
@@ -513,10 +560,17 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
RefExp = cast<MCSymbolRefExpr>(Op.getExpr());
O << RefExp->getSymbol().getName();
+ // The variant kind VK_PPC_NOTOC needs to be handled as a special case
+ // because we do not want the assembly to print out the @notoc at the
+ // end like __tls_get_addr(x@tlsgd)@notoc. Instead we want it to look
+ // like __tls_get_addr@notoc(x@tlsgd).
+ if (RefExp->getKind() == MCSymbolRefExpr::VK_PPC_NOTOC)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
O << '(';
- printOperand(MI, OpNo+1, O);
+ printOperand(MI, OpNo + 1, STI, O);
O << ')';
- if (RefExp->getKind() != MCSymbolRefExpr::VK_None)
+ if (RefExp->getKind() != MCSymbolRefExpr::VK_None &&
+ RefExp->getKind() != MCSymbolRefExpr::VK_PPC_NOTOC)
O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
if (ConstExp != nullptr)
O << '+' << ConstExp->getValue();
@@ -525,7 +579,7 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
/// showRegistersWithPercentPrefix - Check if this register name should be
/// printed with a percentage symbol as prefix.
bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const {
- if (!FullRegNamesWithPercent || TT.isOSDarwin() || TT.getOS() == Triple::AIX)
+ if (!FullRegNamesWithPercent || TT.getOS() == Triple::AIX)
return false;
switch (RegName[0]) {
@@ -545,7 +599,7 @@ bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const {
const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
unsigned RegEncoding)
const {
- if (!TT.isOSDarwin() && !FullRegNames)
+ if (!FullRegNames)
return nullptr;
if (RegNum < PPC::CR0EQ || RegNum > PPC::CR7UN)
return nullptr;
@@ -567,11 +621,11 @@ const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
bool PPCInstPrinter::showRegistersWithPrefix() const {
if (TT.getOS() == Triple::AIX)
return false;
- return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames;
+ return FullRegNamesWithPercent || FullRegNames;
}
void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
@@ -600,4 +654,3 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
assert(Op.isExpr() && "unknown operand kind in printOperand");
Op.getExpr()->print(O, &MAI);
}
-
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 9763aeceef94..5e9b01494416 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -36,45 +36,73 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O) override;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
unsigned OpIdx, unsigned PrintMethodIdx,
- raw_ostream &OS);
+ const MCSubtargetInfo &STI, raw_ostream &OS);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier = nullptr);
- void printATBitsAsHint(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O,
+ const char *Modifier = nullptr);
+ void printATBitsAsHint(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU7ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS34ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printImmZeroOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU7ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU8ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU10ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printS34ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printImmZeroOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo,
- raw_ostream &O);
- void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printTLSCall(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
- void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printcrbitm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
- void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemRegImm34(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMemRegImm34(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMemRegReg(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 593dc2843c3d..2b76af279ce6 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -26,7 +26,8 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
if (is64Bit) {
CodePointerSize = CalleeSaveStackSlotSize = 8;
}
- IsLittleEndian = T.getArch() == Triple::ppc64le;
+ IsLittleEndian =
+ T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
@@ -56,7 +57,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
void PPCXCOFFMCAsmInfo::anchor() {}
PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
- if (T.getArch() == Triple::ppc64le)
+ if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle)
report_fatal_error("XCOFF is not supported for little-endian targets");
CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 27c687686641..48806051f581 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
-#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/MC/MCAsmInfoXCOFF.h"
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index fb65e7320f2b..5f0769fd21f9 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -44,11 +44,13 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+ if (MO.isReg() || MO.isImm())
+ return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- ((MI.getOpcode() == PPC::BL8_NOTOC)
+ ((MI.getOpcode() == PPC::BL8_NOTOC ||
+ MI.getOpcode() == PPC::BL8_NOTOC_TLS)
? (MCFixupKind)PPC::fixup_ppc_br24_notoc
: (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
@@ -92,6 +94,16 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
return 0;
}
+unsigned
+PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(OpNo).isReg() && "Operand should be a register");
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI)
+ << 1;
+ return RegBits;
+}
+
unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -104,20 +116,36 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
return 0;
}
-uint64_t
-PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI,
+ MCFixupKind Fixup) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm())
+ assert(!MO.isReg() && "Not expecting a register for this operand.");
+ if (MO.isImm())
return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the immediate field.
- Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_pcrel34));
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Fixup));
return 0;
}
+uint64_t
+PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getImm34Encoding(MI, OpNo, Fixups, STI,
+ (MCFixupKind)PPC::fixup_ppc_imm34);
+}
+
+uint64_t
+PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getImm34Encoding(MI, OpNo, Fixups, STI,
+ (MCFixupKind)PPC::fixup_ppc_pcrel34);
+}
+
unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -213,8 +241,13 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
(void)SRE;
// Currently these are the only valid PCRelative Relocations.
assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
- SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) &&
- "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL");
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL) &&
+ "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL or "
+ "VK_PPC_GOT_TLSGD_PCREL or VK_PPC_GOT_TLSLD_PCREL or "
+ "VK_PPC_GOT_TPREL_PCREL.");
// Generate the fixup for the relocation.
Fixups.push_back(
MCFixup::create(0, Expr,
@@ -326,8 +359,12 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
- // Return the thread-pointer register's encoding.
- Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ // Return the thread-pointer register's encoding. Add a one byte displacement
+ // if using PC relative memops.
+ const MCExpr *Expr = MO.getExpr();
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(Expr);
+ bool IsPCRel = SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS_PCREL;
+ Fixups.push_back(MCFixup::create(IsPCRel ? 1 : 0, Expr,
(MCFixupKind)PPC::fixup_ppc_nofixup));
const Triple &TT = STI.getTargetTriple();
bool isPPC64 = TT.isPPC64();
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index 588aa76bd806..347e163c9515 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -52,7 +52,14 @@ public:
const MCSubtargetInfo &STI) const;
uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI,
+ MCFixupKind Fixup) const;
+ uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -86,6 +93,9 @@ public:
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 3092d56da1c5..bf9c6feb541e 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -20,8 +20,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
@@ -30,6 +30,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -77,7 +78,17 @@ static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- return createPPCMCSubtargetInfoImpl(TT, CPU, FS);
+ // Set some default feature to MC layer.
+ std::string FullFS = std::string(FS);
+
+ if (TT.isOSAIX()) {
+ if (!FullFS.empty())
+ FullFS = "+aix," + FullFS;
+ else
+ FullFS = "+aix";
+ }
+
+ return createPPCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FullFS);
}
static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
@@ -122,11 +133,12 @@ public:
void emitTCEntry(const MCSymbol &S) override {
if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) {
MCSymbolXCOFF *TCSym =
- cast<MCSymbolXCOFF>(Streamer.getContext().getOrCreateSymbol(
- XSym->getSymbolTableName() + "[TC]"));
+ cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
+ ->getQualNameSymbol();
+ OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
+
if (TCSym->hasRename())
Streamer.emitXCOFFRenameDirective(TCSym, TCSym->getSymbolTableName());
- OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
return;
}
@@ -334,8 +346,8 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
- for (Target *T :
- {&getThePPC32Target(), &getThePPC64Target(), &getThePPC64LETarget()}) {
+ for (Target *T : {&getThePPC32Target(), &getThePPC32LETarget(),
+ &getThePPC64Target(), &getThePPC64LETarget()}) {
// Register the MC asm info.
RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 719e005d9813..03b316341717 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -124,6 +124,11 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
#define GET_SUBTARGETINFO_ENUM
#include "PPCGenSubtargetInfo.inc"
+#define PPC_REGS0_7(X) \
+ { \
+ X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7 \
+ }
+
#define PPC_REGS0_31(X) \
{ \
X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
@@ -156,10 +161,10 @@ using llvm::MCPhysReg;
static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \
static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \
static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \
+ static const MCPhysReg VSRpRegs[32] = PPC_REGS0_31(PPC::VSRp); \
static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
- static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
static const MCPhysReg RRegsNoR0[32] = \
PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
static const MCPhysReg XRegsNoX0[32] = \
@@ -179,8 +184,6 @@ using llvm::MCPhysReg;
PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \
PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \
PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \
- static const MCPhysReg CRRegs[8] = { \
- PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \
- PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7}
-
+ static const MCPhysReg CRRegs[8] = PPC_REGS0_7(PPC::CR); \
+ static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC)
#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index d672d54772e0..77b0331bb14c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -58,14 +58,19 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
switch ((unsigned)Fixup.getKind()) {
default:
report_fatal_error("Unimplemented fixup kind.");
- case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_half16: {
+ const uint8_t SignAndSizeForHalf16 = EncodedSignednessIndicator | 15;
switch (Modifier) {
default:
report_fatal_error("Unsupported modifier for half16 fixup.");
case MCSymbolRefExpr::VK_None:
- return {XCOFF::RelocationType::R_TOC, EncodedSignednessIndicator | 15};
+ return {XCOFF::RelocationType::R_TOC, SignAndSizeForHalf16};
+ case MCSymbolRefExpr::VK_PPC_U:
+ return {XCOFF::RelocationType::R_TOCU, SignAndSizeForHalf16};
+ case MCSymbolRefExpr::VK_PPC_L:
+ return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16};
}
- break;
+ } break;
case PPC::fixup_ppc_br24:
// Branches are 4 byte aligned, so the 24 bits we encode in
// the instruction actually represents a 26 bit offset.
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index d7e3519d5539..63531f72adfb 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -94,7 +94,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instregex "CMPRB(8)?$"),
(instregex "TD(I)?$"),
(instregex "TW(I)?$"),
- (instregex "FCMPU(S|D)$"),
+ (instregex "FCMP(O|U)(S|D)$"),
(instregex "XSTSTDC(S|D)P$"),
FTDIV,
FTSQRT,
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 7e0aa2c6061d..264582b244a7 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -20,17 +20,20 @@
#undef PPC
namespace llvm {
- class PPCTargetMachine;
- class PassRegistry;
- class FunctionPass;
- class MachineInstr;
- class MachineOperand;
- class AsmPrinter;
- class MCInst;
- class MCOperand;
- class ModulePass;
-
- FunctionPass *createPPCCTRLoops();
+class PPCRegisterBankInfo;
+class PPCSubtarget;
+class PPCTargetMachine;
+class PassRegistry;
+class FunctionPass;
+class InstructionSelector;
+class MachineInstr;
+class MachineOperand;
+class AsmPrinter;
+class MCInst;
+class MCOperand;
+class ModulePass;
+
+FunctionPass *createPPCCTRLoops();
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
@@ -44,7 +47,6 @@ namespace llvm {
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCBranchCoalescingPass();
- FunctionPass *createPPCQPXLoadSplatPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
@@ -68,7 +70,6 @@ namespace llvm {
void initializePPCReduceCRLogicalsPass(PassRegistry&);
void initializePPCBSelPass(PassRegistry&);
void initializePPCBranchCoalescingPass(PassRegistry&);
- void initializePPCQPXLoadSplatPass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
void initializePPCPreEmitPeepholePass(PassRegistry &);
@@ -80,7 +81,10 @@ namespace llvm {
ModulePass *createPPCLowerMASSVEntriesPass();
void initializePPCLowerMASSVEntriesPass(PassRegistry &);
extern char &PPCLowerMASSVEntriesID;
-
+
+ InstructionSelector *
+ createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &,
+ const PPCRegisterBankInfo &);
namespace PPCII {
/// Target Operand Flag enum.
@@ -107,6 +111,37 @@ namespace llvm {
/// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL.
MO_GOT_FLAG = 8,
+ // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a
+ // PC Relative linker optimization.
+ MO_PCREL_OPT_FLAG = 16,
+
+ /// MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to
+ /// TLS General Dynamic model.
+ MO_TLSGD_FLAG = 32,
+
+ /// MO_TPREL_FLAG - If this bit is set the symbol reference is relative to
+ /// TLS Initial Exec model.
+ MO_TPREL_FLAG = 64,
+
+ /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
+ /// TLS Local Dynamic model.
+ MO_TLSLD_FLAG = 128,
+
+ /// MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set
+ /// they should produce the relocation @got@tlsgd@pcrel.
+ /// Fix up is VK_PPC_GOT_TLSGD_PCREL
+ MO_GOT_TLSGD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSGD_FLAG,
+
+ /// MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set
+ /// they should produce the relocation @got@tlsld@pcrel.
+ /// Fix up is VK_PPC_GOT_TLSLD_PCREL
+ MO_GOT_TLSLD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSLD_FLAG,
+
+ /// MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set
+ /// they should produce the relocation @got@tprel@pcrel.
+ /// Fix up is VK_PPC_GOT_TPREL_PCREL
+ MO_GOT_TPREL_PCREL_FLAG = MO_GOT_FLAG | MO_TPREL_FLAG | MO_PCREL_FLAG,
+
/// The next are not flags but distinct values.
MO_ACCESS_MASK = 0xf00,
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 9ad78bf67fe6..1e6ded231585 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -57,6 +57,10 @@ def DirectivePwrFuture
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
+def AIXOS: SubtargetFeature<"aix", "IsAIX", "true", "AIX OS">;
+def FeatureModernAIXAs
+ : SubtargetFeature<"modern-aix-as", "HasModernAIXAs", "true",
+ "AIX system assembler is modern enough to support new mnes">;
def FeatureHardFloat : SubtargetFeature<"hard-float", "HasHardFloat", "true",
"Enable floating-point instructions">;
def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
@@ -72,6 +76,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true",
"Enable SPE instructions",
[FeatureHardFloat]>;
+def FeatureEFPU2 : SubtargetFeature<"efpu2", "HasEFPU2", "true",
+ "Enable Embedded Floating-Point APU 2 instructions",
+ [FeatureSPE]>;
def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
@@ -132,9 +139,6 @@ def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
"Enable PPC 4xx instructions">;
def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
"Enable PPC 6xx instructions">;
-def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
- "Enable QPX instructions",
- [FeatureFPU]>;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
@@ -177,6 +181,9 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
"HasAddisLoadFusion", "true",
"Power8 Addis-Load fusion",
[FeatureFusion]>;
+def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true",
+ "Target supports store clustering",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -193,7 +200,7 @@ def FeatureFloat128 :
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD",
"POPCNTD_Fast",
"Enable the popcnt[dw] instructions">;
-// Note that for the a2/a2q processor models we should not use popcnt[dw] by
+// Note that for the a2 processor models we should not use popcnt[dw] by
// default. These processors do support the instructions, but they're
// microcoded, and the software emulation is about twice as fast.
def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
@@ -236,7 +243,15 @@ def FeaturePrefixInstrs : SubtargetFeature<"prefix-instrs", "HasPrefixInstrs",
def FeaturePCRelativeMemops :
SubtargetFeature<"pcrelative-memops", "HasPCRelativeMemops", "true",
"Enable PC relative Memory Ops",
+ [FeatureISA3_0, FeaturePrefixInstrs]>;
+def FeaturePairedVectorMemops:
+ SubtargetFeature<"paired-vector-memops", "PairedVectorMemops", "true",
+ "32Byte load and store instructions",
[FeatureISA3_0]>;
+def FeatureMMA : SubtargetFeature<"mma", "HasMMA", "true",
+ "Enable MMA instructions",
+ [FeatureP8Vector, FeatureP9Altivec,
+ FeaturePairedVectorMemops]>;
def FeaturePredictableSelectIsExpensive :
SubtargetFeature<"predictable-select-expensive",
@@ -320,6 +335,8 @@ def ProcessorFeatures {
[DirectivePwr9,
FeatureP9Altivec,
FeatureP9Vector,
+ FeaturePPCPreRASched,
+ FeaturePPCPostRASched,
FeatureISA3_0,
FeaturePredictableSelectIsExpensive
];
@@ -329,9 +346,7 @@ def ProcessorFeatures {
// dispatch for vector operations than scalar ones. For the time being,
// this list also includes scheduling-related features since we do not have
// enough info to create custom scheduling strategies for future CPUs.
- list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits,
- FeaturePPCPreRASched,
- FeaturePPCPostRASched];
+ list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits];
list<SubtargetFeature> P9InheritableFeatures =
!listconcat(P8InheritableFeatures, P9AdditionalFeatures);
list<SubtargetFeature> P9Features =
@@ -340,9 +355,12 @@ def ProcessorFeatures {
// Power10
// For P10 CPU we assume that all of the existing features from Power9
// still exist with the exception of those we know are Power9 specific.
+ list<SubtargetFeature> FusionFeatures = [FeatureStoreFusion];
list<SubtargetFeature> P10AdditionalFeatures =
- [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
- FeaturePCRelativeMemops, FeatureP10Vector];
+ !listconcat(FusionFeatures, [
+ DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
+ FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA,
+ FeaturePairedVectorMemops]);
list<SubtargetFeature> P10SpecificFeatures = [];
list<SubtargetFeature> P10InheritableFeatures =
!listconcat(P9InheritableFeatures, P10AdditionalFeatures);
@@ -427,6 +445,7 @@ def getAltVSXFMAOpcode : InstrMapping {
include "PPCRegisterInfo.td"
include "PPCSchedule.td"
+include "GISel/PPCRegisterBanks.td"
//===----------------------------------------------------------------------===//
// PowerPC processors supported.
@@ -514,15 +533,6 @@ def : ProcessorModel<"a2", PPCA2Model,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
-def : ProcessorModel<"a2q", PPCA2Model,
- [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
- FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
- FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
- FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */, FeatureQPX,
- FeatureMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
@@ -561,7 +571,7 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
// No scheduler model yet.
-def : ProcessorModel<"pwr10", NoSchedModel, ProcessorFeatures.P10Features>;
+def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
@@ -592,6 +602,13 @@ def PPCInstrInfo : InstrInfo {
let noNamedPositionallyEncodedOperands = 1;
}
+def PPCAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
def PPCAsmParser : AsmParser {
let ShouldEmitMatchRegisterName = 0;
}
@@ -610,6 +627,7 @@ def PPC : Target {
// Information about the instructions.
let InstructionSet = PPCInstrInfo;
+ let AssemblyWriters = [PPCAsmWriter];
let AssemblyParsers = [PPCAsmParser];
let AssemblyParserVariants = [PPCAsmParserVariant];
let AllowRegisterRenaming = 1;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bf5fe741bac8..cce21f32414a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -27,11 +27,11 @@
#include "PPCTargetStreamer.h"
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MachO.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -47,11 +47,11 @@
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -62,9 +62,11 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -72,6 +74,7 @@
#include <new>
using namespace llvm;
+using namespace llvm::XCOFF;
#define DEBUG_TYPE "asmprinter"
@@ -147,7 +150,21 @@ public:
class PPCAIXAsmPrinter : public PPCAsmPrinter {
private:
+ /// Symbols lowered from ExternalSymbolSDNodes, we will need to emit extern
+ /// linkage for them in AIX.
+ SmallPtrSet<MCSymbol *, 8> ExtSymSDNodeSymbols;
+
+ /// A format indicator and unique trailing identifier to form part of the
+ /// sinit/sterm function names.
+ std::string FormatIndicatorAndUniqueModId;
+
static void ValidateGV(const GlobalVariable *GV);
+ // Record a list of GlobalAlias associated with a GlobalObject.
+ // This is used for AIX's extra-label-at-definition aliasing strategy.
+ DenseMap<const GlobalObject *, SmallVector<const GlobalAlias *, 1>>
+ GOAliasMap;
+
+ void emitTracebackTable();
public:
PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
@@ -161,15 +178,28 @@ public:
bool doInitialization(Module &M) override;
+ void emitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool IsCtor) override;
+
void SetupMachineFunction(MachineFunction &MF) override;
void emitGlobalVariable(const GlobalVariable *GV) override;
void emitFunctionDescriptor() override;
+ void emitFunctionEntryLabel() override;
+
+ void emitFunctionBodyEnd() override;
+
void emitEndOfAsmFile(Module &) override;
void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override;
+
+ void emitInstruction(const MachineInstr *MI) override;
+
+ bool doFinalization(Module &M) override;
+
+ void emitTTypeReference(const GlobalValue *GV, unsigned Encoding) override;
};
} // end anonymous namespace
@@ -463,6 +493,14 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ unsigned Opcode = PPC::BL8_NOP_TLS;
+
+ assert(MI->getNumOperands() >= 3 && "Expecting at least 3 operands from MI");
+ if (MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
+ MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG) {
+ Kind = MCSymbolRefExpr::VK_PPC_NOTOC;
+ Opcode = PPC::BL8_NOTOC_TLS;
+ }
const Module *M = MF->getFunction().getParent();
assert(MI->getOperand(0).isReg() &&
@@ -490,10 +528,10 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
EmitToStreamer(*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ?
- PPC::BL8_NOP_TLS : PPC::BL_TLS)
- .addExpr(TlsRef)
- .addExpr(SymVar));
+ MCInstBuilder(Subtarget->isPPC64() ? Opcode
+ : (unsigned)PPC::BL_TLS)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
}
/// Map a machine operand for a TOC pseudo-machine instruction to its
@@ -533,9 +571,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
if (Subtarget->hasSPE()) {
if (PPC::F4RCRegClass.contains(Reg) ||
PPC::F8RCRegClass.contains(Reg) ||
- PPC::QBRCRegClass.contains(Reg) ||
- PPC::QFRCRegClass.contains(Reg) ||
- PPC::QSRCRegClass.contains(Reg) ||
PPC::VFRCRegClass.contains(Reg) ||
PPC::VRRCRegClass.contains(Reg) ||
PPC::VSFRCRegClass.contains(Reg) ||
@@ -550,6 +585,38 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
}
#endif
+
+ auto getTOCRelocAdjustedExprForXCOFF = [this](const MCExpr *Expr,
+ ptrdiff_t OriginalOffset) {
+ // Apply an offset to the TOC-based expression such that the adjusted
+ // notional offset from the TOC base (to be encoded into the instruction's D
+ // or DS field) is the signed 16-bit truncation of the original notional
+ // offset from the TOC base.
+ // This is consistent with the treatment used both by XL C/C++ and
+ // by AIX ld -r.
+ ptrdiff_t Adjustment =
+ OriginalOffset - llvm::SignExtend32<16>(OriginalOffset);
+ return MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(-Adjustment, OutContext), OutContext);
+ };
+
+ auto getTOCEntryLoadingExprForXCOFF =
+ [IsPPC64, getTOCRelocAdjustedExprForXCOFF,
+ this](const MCSymbol *MOSymbol, const MCExpr *Expr) -> const MCExpr * {
+ const unsigned EntryByteSize = IsPPC64 ? 8 : 4;
+ const auto TOCEntryIter = TOC.find(MOSymbol);
+ assert(TOCEntryIter != TOC.end() &&
+ "Could not find the TOC entry for this symbol.");
+ const ptrdiff_t EntryDistanceFromTOCBase =
+ (TOCEntryIter - TOC.begin()) * EntryByteSize;
+ constexpr int16_t PositiveTOCRange = INT16_MAX;
+
+ if (EntryDistanceFromTOCBase > PositiveTOCRange)
+ return getTOCRelocAdjustedExprForXCOFF(Expr, EntryDistanceFromTOCBase);
+
+ return Expr;
+ };
+
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
@@ -696,6 +763,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
assert(
TM.getCodeModel() == CodeModel::Small &&
"This pseudo should only be selected for 32-bit small code model.");
+ Exp = getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
@@ -724,17 +792,20 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
"Invalid operand!");
+ // Map the operand to its corresponding MCSymbol.
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
// Map the machine operand to its corresponding MCSymbol, then map the
// global address operand to be a reference to the TOC entry we will
// synthesize later.
- MCSymbol *TOCEntry =
- lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
const MCSymbolRefExpr::VariantKind VK =
IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
const MCExpr *Exp =
MCSymbolRefExpr::create(TOCEntry, VK, OutContext);
- TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ TmpInst.getOperand(1) = MCOperand::createExpr(
+ IsAIX ? getTOCEntryLoadingExprForXCOFF(MOSymbol, Exp) : Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -1010,6 +1081,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::GETtlsADDR:
// Transform: %x3 = GETtlsADDR %x3, @sym
// Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
+ case PPC::GETtlsADDRPCREL:
case PPC::GETtlsADDR32: {
// Transform: %r3 = GETtlsADDR32 %r3, @sym
// Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
@@ -1055,6 +1127,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::GETtlsldADDR:
// Transform: %x3 = GETtlsldADDR %x3, @sym
// Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
+ case PPC::GETtlsldADDRPCREL:
case PPC::GETtlsldADDR32: {
// Transform: %r3 = GETtlsldADDR32 %r3, @sym
// Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
@@ -1081,6 +1154,21 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addExpr(SymDtprel));
return;
}
+ case PPC::PADDIdtprel: {
+ // Transform: %rd = PADDIdtprel %rs, @sym
+ // Into: %rd = PADDI8 %rs, sym@dtprel
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymDtprel = MCSymbolRefExpr::create(
+ MOSymbol, MCSymbolRefExpr::VK_DTPREL, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::PADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
+
case PPC::ADDIdtprelL:
// Transform: %xd = ADDIdtprelL %xs, @sym
// Into: %xd = ADDI8 %xs, sym@dtprel@l
@@ -1137,10 +1225,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::LWA: {
// Verify alignment is legal, so we don't create relocations
// that can't be supported.
- // FIXME: This test is currently disabled for Darwin. The test
- // suite shows a handful of test cases that fail this check for
- // Darwin. Those need to be investigated before this sanity test
- // can be enabled for those subtargets.
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal()) {
@@ -1621,17 +1705,19 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
assert(LinkageAttr != MCSA_Invalid && "LinkageAttr should not MCSA_Invalid.");
MCSymbolAttr VisibilityAttr = MCSA_Invalid;
- switch (GV->getVisibility()) {
+ if (!TM.getIgnoreXCOFFVisibility()) {
+ switch (GV->getVisibility()) {
- // TODO: "exported" and "internal" Visibility needs to go here.
- case GlobalValue::DefaultVisibility:
- break;
- case GlobalValue::HiddenVisibility:
- VisibilityAttr = MAI->getHiddenVisibilityAttr();
- break;
- case GlobalValue::ProtectedVisibility:
- VisibilityAttr = MAI->getProtectedVisibilityAttr();
- break;
+ // TODO: "exported" and "internal" Visibility needs to go here.
+ case GlobalValue::DefaultVisibility:
+ break;
+ case GlobalValue::HiddenVisibility:
+ VisibilityAttr = MAI->getHiddenVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ VisibilityAttr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
}
OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
@@ -1650,18 +1736,305 @@ void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
return AsmPrinter::SetupMachineFunction(MF);
}
+void PPCAIXAsmPrinter::emitFunctionBodyEnd() {
+
+ if (!TM.getXCOFFTracebackTable())
+ return;
+
+ emitTracebackTable();
+}
+
+void PPCAIXAsmPrinter::emitTracebackTable() {
+
+ // Create a symbol for the end of function.
+ MCSymbol *FuncEnd = createTempSymbol(MF->getName());
+ OutStreamer->emitLabel(FuncEnd);
+
+ OutStreamer->AddComment("Traceback table begin");
+ // Begin with a fullword of zero.
+ OutStreamer->emitIntValueInHexWithPadding(0, 4 /*size*/);
+
+ SmallString<128> CommentString;
+ raw_svector_ostream CommentOS(CommentString);
+
+ auto EmitComment = [&]() {
+ OutStreamer->AddComment(CommentOS.str());
+ CommentString.clear();
+ };
+
+ auto EmitCommentAndValue = [&](uint64_t Value, int Size) {
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(Value, Size);
+ };
+
+ unsigned int Version = 0;
+ CommentOS << "Version = " << Version;
+ EmitCommentAndValue(Version, 1);
+
+ // There is a lack of information in the IR to assist with determining the
+ // source language. AIX exception handling mechanism would only search for
+ // personality routine and LSDA area when such language supports exception
+ // handling. So to be conservatively correct and allow runtime to do its job,
+ // we need to set it to C++ for now.
+ TracebackTable::LanguageID LanguageIdentifier =
+ TracebackTable::CPlusPlus; // C++
+
+ CommentOS << "Language = "
+ << getNameForTracebackTableLanguageId(LanguageIdentifier);
+ EmitCommentAndValue(LanguageIdentifier, 1);
+
+ // This is only populated for the third and fourth bytes.
+ uint32_t FirstHalfOfMandatoryField = 0;
+
+ // Emit the 3rd byte of the mandatory field.
+
+ // We always set traceback offset bit to true.
+ FirstHalfOfMandatoryField |= TracebackTable::HasTraceBackTableOffsetMask;
+
+ const PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Check the function uses floating-point processor instructions or not
+ for (unsigned Reg = PPC::F0; Reg <= PPC::F31; ++Reg) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ FirstHalfOfMandatoryField |= TracebackTable::IsFloatingPointPresentMask;
+ break;
+ }
+ }
+
+#define GENBOOLCOMMENT(Prefix, V, Field) \
+ CommentOS << (Prefix) << ((V) & (TracebackTable::Field##Mask) ? "+" : "-") \
+ << #Field
+
+#define GENVALUECOMMENT(PrefixAndName, V, Field) \
+ CommentOS << (PrefixAndName) << " = " \
+ << static_cast<unsigned>(((V) & (TracebackTable::Field##Mask)) >> \
+ (TracebackTable::Field##Shift))
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsGlobaLinkage);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsOutOfLineEpilogOrPrologue);
+ EmitComment();
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, HasTraceBackTableOffset);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsInternalProcedure);
+ EmitComment();
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, HasControlledStorage);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsTOCless);
+ EmitComment();
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsFloatingPointPresent);
+ EmitComment();
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField,
+ IsFloatingPointOperationLogOrAbortEnabled);
+ EmitComment();
+
+ OutStreamer->emitIntValueInHexWithPadding(
+ (FirstHalfOfMandatoryField & 0x0000ff00) >> 8, 1);
+
+ // Set the 4th byte of the mandatory field.
+ FirstHalfOfMandatoryField |= TracebackTable::IsFunctionNamePresentMask;
+
+ static_assert(XCOFF::AllocRegNo == 31, "Unexpected register usage!");
+ if (MRI.isPhysRegUsed(Subtarget->isPPC64() ? PPC::X31 : PPC::R31))
+ FirstHalfOfMandatoryField |= TracebackTable::IsAllocaUsedMask;
+
+ const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
+ if (!MustSaveCRs.empty())
+ FirstHalfOfMandatoryField |= TracebackTable::IsCRSavedMask;
+
+ if (FI->mustSaveLR())
+ FirstHalfOfMandatoryField |= TracebackTable::IsLRSavedMask;
+
+ GENBOOLCOMMENT("", FirstHalfOfMandatoryField, IsInterruptHandler);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsFunctionNamePresent);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsAllocaUsed);
+ EmitComment();
+ GENVALUECOMMENT("OnConditionDirective", FirstHalfOfMandatoryField,
+ OnConditionDirective);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsCRSaved);
+ GENBOOLCOMMENT(", ", FirstHalfOfMandatoryField, IsLRSaved);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding((FirstHalfOfMandatoryField & 0xff),
+ 1);
+
+ // Set the 5th byte of mandatory field.
+ uint32_t SecondHalfOfMandatoryField = 0;
+
+ // Always store back chain.
+ SecondHalfOfMandatoryField |= TracebackTable::IsBackChainStoredMask;
+
+ uint32_t FPRSaved = 0;
+ for (unsigned Reg = PPC::F14; Reg <= PPC::F31; ++Reg) {
+ if (MRI.isPhysRegModified(Reg)) {
+ FPRSaved = PPC::F31 - Reg + 1;
+ break;
+ }
+ }
+ SecondHalfOfMandatoryField |= (FPRSaved << TracebackTable::FPRSavedShift) &
+ TracebackTable::FPRSavedMask;
+ GENBOOLCOMMENT("", SecondHalfOfMandatoryField, IsBackChainStored);
+ GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, IsFixup);
+ GENVALUECOMMENT(", NumOfFPRsSaved", SecondHalfOfMandatoryField, FPRSaved);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(
+ (SecondHalfOfMandatoryField & 0xff000000) >> 24, 1);
+
+ // Set the 6th byte of mandatory field.
+ bool ShouldEmitEHBlock = TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF);
+ if (ShouldEmitEHBlock)
+ SecondHalfOfMandatoryField |= TracebackTable::HasExtensionTableMask;
+
+ uint32_t GPRSaved = 0;
+
+ // X13 is reserved under 64-bit environment.
+ unsigned GPRBegin = Subtarget->isPPC64() ? PPC::X14 : PPC::R13;
+ unsigned GPREnd = Subtarget->isPPC64() ? PPC::X31 : PPC::R31;
+
+ for (unsigned Reg = GPRBegin; Reg <= GPREnd; ++Reg) {
+ if (MRI.isPhysRegModified(Reg)) {
+ GPRSaved = GPREnd - Reg + 1;
+ break;
+ }
+ }
+
+ SecondHalfOfMandatoryField |= (GPRSaved << TracebackTable::GPRSavedShift) &
+ TracebackTable::GPRSavedMask;
+
+ GENBOOLCOMMENT("", SecondHalfOfMandatoryField, HasVectorInfo);
+ GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, HasExtensionTable);
+ GENVALUECOMMENT(", NumOfGPRsSaved", SecondHalfOfMandatoryField, GPRSaved);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(
+ (SecondHalfOfMandatoryField & 0x00ff0000) >> 16, 1);
+
+ // Set the 7th byte of mandatory field.
+ uint32_t NumberOfFixedPara = FI->getFixedParamNum();
+ SecondHalfOfMandatoryField |=
+ (NumberOfFixedPara << TracebackTable::NumberOfFixedParmsShift) &
+ TracebackTable::NumberOfFixedParmsMask;
+ GENVALUECOMMENT("NumberOfFixedParms", SecondHalfOfMandatoryField,
+ NumberOfFixedParms);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(
+ (SecondHalfOfMandatoryField & 0x0000ff00) >> 8, 1);
+
+ // Set the 8th byte of mandatory field.
+
+ // Always set parameter on stack.
+ SecondHalfOfMandatoryField |= TracebackTable::HasParmsOnStackMask;
+
+ uint32_t NumberOfFPPara = FI->getFloatingPointParamNum();
+ SecondHalfOfMandatoryField |=
+ (NumberOfFPPara << TracebackTable::NumberOfFloatingPointParmsShift) &
+ TracebackTable::NumberOfFloatingPointParmsMask;
+
+ GENVALUECOMMENT("NumberOfFPParms", SecondHalfOfMandatoryField,
+ NumberOfFloatingPointParms);
+ GENBOOLCOMMENT(", ", SecondHalfOfMandatoryField, HasParmsOnStack);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(SecondHalfOfMandatoryField & 0xff,
+ 1);
+
+ // Generate the optional fields of traceback table.
+
+ // Parameter type.
+ if (NumberOfFixedPara || NumberOfFPPara) {
+ assert((SecondHalfOfMandatoryField & TracebackTable::HasVectorInfoMask) ==
+ 0 &&
+ "VectorInfo has not been implemented.");
+ uint32_t ParaType = FI->getParameterType();
+ CommentOS << "Parameter type = "
+ << XCOFF::parseParmsType(ParaType,
+ NumberOfFixedPara + NumberOfFPPara);
+ EmitComment();
+ OutStreamer->emitIntValueInHexWithPadding(ParaType, sizeof(ParaType));
+ }
+
+ // Traceback table offset.
+ OutStreamer->AddComment("Function size");
+ if (FirstHalfOfMandatoryField & TracebackTable::HasTraceBackTableOffsetMask) {
+ MCSymbol *FuncSectSym = getObjFileLowering().getFunctionEntryPointSymbol(
+ &(MF->getFunction()), TM);
+ OutStreamer->emitAbsoluteSymbolDiff(FuncEnd, FuncSectSym, 4);
+ }
+
+ // Since we unset the Int_Handler.
+ if (FirstHalfOfMandatoryField & TracebackTable::IsInterruptHandlerMask)
+ report_fatal_error("Hand_Mask not implement yet");
+
+ if (FirstHalfOfMandatoryField & TracebackTable::HasControlledStorageMask)
+ report_fatal_error("Ctl_Info not implement yet");
+
+ if (FirstHalfOfMandatoryField & TracebackTable::IsFunctionNamePresentMask) {
+ StringRef Name = MF->getName().substr(0, INT16_MAX);
+ int16_t NameLength = Name.size();
+ CommentOS << "Function name len = "
+ << static_cast<unsigned int>(NameLength);
+ EmitCommentAndValue(NameLength, 2);
+ OutStreamer->AddComment("Function Name");
+ OutStreamer->emitBytes(Name);
+ }
+
+ if (FirstHalfOfMandatoryField & TracebackTable::IsAllocaUsedMask) {
+ uint8_t AllocReg = XCOFF::AllocRegNo;
+ OutStreamer->AddComment("AllocaUsed");
+ OutStreamer->emitIntValueInHex(AllocReg, sizeof(AllocReg));
+ }
+
+ uint8_t ExtensionTableFlag = 0;
+ if (SecondHalfOfMandatoryField & TracebackTable::HasExtensionTableMask) {
+ if (ShouldEmitEHBlock)
+ ExtensionTableFlag |= ExtendedTBTableFlag::TB_EH_INFO;
+
+ CommentOS << "ExtensionTableFlag = "
+ << getExtendedTBTableFlagString(ExtensionTableFlag);
+ EmitCommentAndValue(ExtensionTableFlag, sizeof(ExtensionTableFlag));
+ }
+
+ if (ExtensionTableFlag & ExtendedTBTableFlag::TB_EH_INFO) {
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *EHInfoSym =
+ TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF);
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym);
+ const MCSymbol *TOCBaseSym =
+ cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
+ const MCExpr *Exp =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx),
+ MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx);
+
+ const DataLayout &DL = getDataLayout();
+ OutStreamer->emitValueToAlignment(4);
+ OutStreamer->AddComment("EHInfo Table");
+ OutStreamer->emitValue(Exp, DL.getPointerSize());
+ }
+
+#undef GENBOOLCOMMENT
+#undef GENVALUECOMMENT
+}
+
void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) {
// Early error checking limiting what is supported.
if (GV->isThreadLocal())
report_fatal_error("Thread local not yet supported on AIX.");
- if (GV->hasSection())
- report_fatal_error("Custom section for Data not yet supported.");
-
if (GV->hasComdat())
report_fatal_error("COMDAT not yet supported by AIX.");
}
+static bool isSpecialLLVMGlobalArrayToSkip(const GlobalVariable *GV) {
+ return GV->hasAppendingLinkage() &&
+ StringSwitch<bool>(GV->getName())
+ // TODO: Linker could still eliminate the GV if we just skip
+ // handling llvm.used array. Skipping them for now until we or the
+ // AIX OS team come up with a good solution.
+ .Case("llvm.used", true)
+ // It's correct to just skip llvm.compiler.used array here.
+ .Case("llvm.compiler.used", true)
+ .Default(false);
+}
+
static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
return StringSwitch<bool>(GV->getName())
.Cases("llvm.global_ctors", "llvm.global_dtors", true)
@@ -1669,19 +2042,15 @@ static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
}
void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
- ValidateGV(GV);
-
- // TODO: Update the handling of global arrays for static init when we support
- // the ".ref" directive.
- // Otherwise, we can skip these arrays, because the AIX linker collects
- // static init functions simply based on their name.
- if (isSpecialLLVMGlobalArrayForStaticInit(GV))
+ // Special LLVM global arrays have been handled at the initialization.
+ if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV))
return;
- // Create the symbol, set its storage class.
+ assert(!GV->getName().startswith("llvm.") &&
+ "Unhandled intrinsic global variable.");
+ ValidateGV(GV);
+
MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
- GVSym->setStorageClass(
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
if (GV->isDeclarationForLinker()) {
emitLinkage(GV, GVSym);
@@ -1705,10 +2074,12 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
Align Alignment = GV->getAlign().getValueOr(DL.getPreferredAlign(GV));
uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+ GVSym->setStorageClass(
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
if (GVKind.isBSSLocal())
OutStreamer->emitXCOFFLocalCommonSymbol(
- OutContext.getOrCreateSymbol(GVSym->getUnqualifiedName()), Size,
+ OutContext.getOrCreateSymbol(GVSym->getSymbolTableName()), Size,
GVSym, Alignment.value());
else
OutStreamer->emitCommonSymbol(GVSym, Size, Alignment.value());
@@ -1718,7 +2089,18 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
MCSymbol *EmittedInitSym = GVSym;
emitLinkage(GV, EmittedInitSym);
emitAlignment(getGVAlignment(GV, DL), GV);
- OutStreamer->emitLabel(EmittedInitSym);
+
+ // When -fdata-sections is enabled, every GlobalVariable will
+ // be put into its own csect; therefore, label is not necessary here.
+ if (!TM.getDataSections() || GV->hasSection()) {
+ OutStreamer->emitLabel(EmittedInitSym);
+ }
+
+ // Emit aliasing label for global variable.
+ llvm::for_each(GOAliasMap[GV], [this](const GlobalAlias *Alias) {
+ OutStreamer->emitLabel(getSymbol(Alias));
+ });
+
emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
}
@@ -1730,6 +2112,13 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
// Emit function descriptor.
OutStreamer->SwitchSection(
cast<MCSymbolXCOFF>(CurrentFnDescSym)->getRepresentedCsect());
+
+ // Emit aliasing label for function descriptor csect.
+ llvm::for_each(GOAliasMap[&MF->getFunction()],
+ [this](const GlobalAlias *Alias) {
+ OutStreamer->emitLabel(getSymbol(Alias));
+ });
+
// Emit function entry point address.
OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
PointerSize);
@@ -1745,6 +2134,20 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
OutStreamer->SwitchSection(Current.first, Current.second);
}
+void PPCAIXAsmPrinter::emitFunctionEntryLabel() {
+ // It's not necessary to emit the label when we have individual
+ // function in its own csect.
+ if (!TM.getFunctionSections())
+ PPCAsmPrinter::emitFunctionEntryLabel();
+
+ // Emit aliasing label for function entry point label.
+ llvm::for_each(
+ GOAliasMap[&MF->getFunction()], [this](const GlobalAlias *Alias) {
+ OutStreamer->emitLabel(
+ getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM));
+ });
+}
+
void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
// If there are no functions in this module, we will never need to reference
// the TOC base.
@@ -1757,20 +2160,10 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
- const unsigned EntryByteSize = Subtarget->isPPC64() ? 8 : 4;
- const unsigned TOCEntriesByteSize = TOC.size() * EntryByteSize;
- // TODO: If TOC entries' size is larger than 32768, then we run out of
- // positive displacement to reach the TOC entry. We need to decide how to
- // handle entries' size larger than that later.
- if (TOCEntriesByteSize > 32767) {
- report_fatal_error("Handling of TOC entry displacement larger than 32767 "
- "is not yet implemented.");
- }
-
for (auto &I : TOC) {
// Setup the csect for the current TC entry.
MCSectionXCOFF *TCEntry = cast<MCSectionXCOFF>(
- getObjFileLowering().getSectionForTOCEntry(I.first));
+ getObjFileLowering().getSectionForTOCEntry(I.first, TM));
OutStreamer->SwitchSection(TCEntry);
OutStreamer->emitLabel(I.second);
@@ -1780,10 +2173,6 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
}
bool PPCAIXAsmPrinter::doInitialization(Module &M) {
- if (M.alias_size() > 0u)
- report_fatal_error(
- "module has aliases, which LLVM does not yet support for AIX");
-
const bool Result = PPCAsmPrinter::doInitialization(M);
auto setCsectAlignment = [this](const GlobalObject *GO) {
@@ -1803,19 +2192,174 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
// We need to know, up front, the alignment of csects for the assembly path,
// because once a .csect directive gets emitted, we could not change the
// alignment value on it.
- for (const auto &G : M.globals())
+ for (const auto &G : M.globals()) {
+ if (isSpecialLLVMGlobalArrayToSkip(&G))
+ continue;
+
+ if (isSpecialLLVMGlobalArrayForStaticInit(&G)) {
+ // Generate a format indicator and a unique module id to be a part of
+ // the sinit and sterm function names.
+ if (FormatIndicatorAndUniqueModId.empty()) {
+ std::string UniqueModuleId = getUniqueModuleId(&M);
+ if (UniqueModuleId != "")
+ // TODO: Use source file full path to generate the unique module id
+ // and add a format indicator as a part of function name in case we
+ // will support more than one format.
+ FormatIndicatorAndUniqueModId = "clang_" + UniqueModuleId.substr(1);
+ else
+ // Use the Pid and current time as the unique module id when we cannot
+ // generate one based on a module's strong external symbols.
+ // FIXME: Adjust the comment accordingly after we use source file full
+ // path instead.
+ FormatIndicatorAndUniqueModId =
+ "clangPidTime_" + llvm::itostr(sys::Process::getProcessId()) +
+ "_" + llvm::itostr(time(nullptr));
+ }
+
+ emitSpecialLLVMGlobal(&G);
+ continue;
+ }
+
setCsectAlignment(&G);
+ }
for (const auto &F : M)
setCsectAlignment(&F);
+ // Construct an aliasing list for each GlobalObject.
+ for (const auto &Alias : M.aliases()) {
+ const GlobalObject *Base = Alias.getBaseObject();
+ if (!Base)
+ report_fatal_error(
+ "alias without a base object is not yet supported on AIX");
+ GOAliasMap[Base].push_back(&Alias);
+ }
+
return Result;
}
-/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
-/// for a MachineFunction to the given output stream, in a format that the
-/// Darwin assembler can deal with.
-///
+void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case PPC::BL8:
+ case PPC::BL:
+ case PPC::BL8_NOP:
+ case PPC::BL_NOP: {
+ const MachineOperand &MO = MI->getOperand(0);
+ if (MO.isSymbol()) {
+ MCSymbolXCOFF *S =
+ cast<MCSymbolXCOFF>(OutContext.getOrCreateSymbol(MO.getSymbolName()));
+ ExtSymSDNodeSymbols.insert(S);
+ }
+ } break;
+ case PPC::BL_TLS:
+ case PPC::BL8_TLS:
+ case PPC::BL8_TLS_:
+ case PPC::BL8_NOP_TLS:
+ report_fatal_error("TLS call not yet implemented");
+ case PPC::TAILB:
+ case PPC::TAILB8:
+ case PPC::TAILBA:
+ case PPC::TAILBA8:
+ case PPC::TAILBCTR:
+ case PPC::TAILBCTR8:
+ if (MI->getOperand(0).isSymbol())
+ report_fatal_error("Tail call for extern symbol not yet supported.");
+ break;
+ }
+ return PPCAsmPrinter::emitInstruction(MI);
+}
+
+bool PPCAIXAsmPrinter::doFinalization(Module &M) {
+ for (MCSymbol *Sym : ExtSymSDNodeSymbols)
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern);
+ return PPCAsmPrinter::doFinalization(M);
+}
+
+static unsigned mapToSinitPriority(int P) {
+ if (P < 0 || P > 65535)
+ report_fatal_error("invalid init priority");
+
+ if (P <= 20)
+ return P;
+
+ if (P < 81)
+ return 20 + (P - 20) * 16;
+
+ if (P <= 1124)
+ return 1004 + (P - 81);
+
+ if (P < 64512)
+ return 2047 + (P - 1124) * 33878;
+
+ return 2147482625u + (P - 64512);
+}
+
+static std::string convertToSinitPriority(int Priority) {
+ // This helper function converts clang init priority to values used in sinit
+ // and sterm functions.
+ //
+ // The conversion strategies are:
+ // We map the reserved clang/gnu priority range [0, 100] into the sinit/sterm
+ // reserved priority range [0, 1023] by
+ // - directly mapping the first 21 and the last 20 elements of the ranges
+ // - linear interpolating the intermediate values with a step size of 16.
+ //
+ // We map the non reserved clang/gnu priority range of [101, 65535] into the
+ // sinit/sterm priority range [1024, 2147483648] by:
+ // - directly mapping the first and the last 1024 elements of the ranges
+ // - linear interpolating the intermediate values with a step size of 33878.
+ unsigned int P = mapToSinitPriority(Priority);
+
+ std::string PrioritySuffix;
+ llvm::raw_string_ostream os(PrioritySuffix);
+ os << llvm::format_hex_no_prefix(P, 8);
+ os.flush();
+ return PrioritySuffix;
+}
+
+void PPCAIXAsmPrinter::emitXXStructorList(const DataLayout &DL,
+ const Constant *List, bool IsCtor) {
+ SmallVector<Structor, 8> Structors;
+ preprocessXXStructorList(DL, List, Structors);
+ if (Structors.empty())
+ return;
+
+ unsigned Index = 0;
+ for (Structor &S : Structors) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(S.Func))
+ S.Func = CE->getOperand(0);
+
+ llvm::GlobalAlias::create(
+ GlobalValue::ExternalLinkage,
+ (IsCtor ? llvm::Twine("__sinit") : llvm::Twine("__sterm")) +
+ llvm::Twine(convertToSinitPriority(S.Priority)) +
+ llvm::Twine("_", FormatIndicatorAndUniqueModId) +
+ llvm::Twine("_", llvm::utostr(Index++)),
+ cast<Function>(S.Func));
+ }
+}
+
+void PPCAIXAsmPrinter::emitTTypeReference(const GlobalValue *GV,
+ unsigned Encoding) {
+ if (GV) {
+ MCSymbol *TypeInfoSym = TM.getSymbol(GV);
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym);
+ const MCSymbol *TOCBaseSym =
+ cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
+ auto &Ctx = OutStreamer->getContext();
+ const MCExpr *Exp =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx),
+ MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx);
+ OutStreamer->emitValue(Exp, GetSizeOfEncodedValue(Encoding));
+ } else
+ OutStreamer->emitIntValue(0, GetSizeOfEncodedValue(Encoding));
+}
+
+// Return a pass that prints the PPC assembly code for a MachineFunction to the
+// given output stream.
static AsmPrinter *
createPPCAsmPrinterPass(TargetMachine &tm,
std::unique_ptr<MCStreamer> &&Streamer) {
@@ -1829,6 +2373,8 @@ createPPCAsmPrinterPass(TargetMachine &tm,
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(getThePPC32Target(),
createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(getThePPC32LETarget(),
+ createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getThePPC64Target(),
createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getThePPC64LETarget(),
diff --git a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 2259a29f838a..3c6b1f84b821 100644
--- a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -59,7 +59,7 @@ using namespace llvm;
namespace {
-#define DEBUG_TYPE "bool-ret-to-int"
+#define DEBUG_TYPE "ppc-bool-ret-to-int"
STATISTIC(NumBoolRetPromotion,
"Number of times a bool feeding a RetInst was promoted to an int");
@@ -75,12 +75,11 @@ class PPCBoolRetToInt : public FunctionPass {
WorkList.push_back(V);
Defs.insert(V);
while (!WorkList.empty()) {
- Value *Curr = WorkList.back();
- WorkList.pop_back();
+ Value *Curr = WorkList.pop_back_val();
auto *CurrUser = dyn_cast<User>(Curr);
- // Operands of CallInst are skipped because they may not be Bool type,
- // and their positions are defined by ABI.
- if (CurrUser && !isa<CallInst>(Curr))
+ // Operands of CallInst/Constant are skipped because they may not be Bool
+ // type. For CallInst, their positions are defined by ABI.
+ if (CurrUser && !isa<CallInst>(Curr) && !isa<Constant>(Curr))
for (auto &Op : CurrUser->operands())
if (Defs.insert(Op).second)
WorkList.push_back(Op);
@@ -90,6 +89,9 @@ class PPCBoolRetToInt : public FunctionPass {
// Translate a i1 value to an equivalent i32/i64 value:
Value *translate(Value *V) {
+ assert(V->getType() == Type::getInt1Ty(V->getContext()) &&
+ "Expect an i1 value");
+
Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext())
: Type::getInt32Ty(V->getContext());
@@ -252,9 +254,9 @@ class PPCBoolRetToInt : public FunctionPass {
auto *First = dyn_cast<User>(Pair.first);
auto *Second = dyn_cast<User>(Pair.second);
assert((!First || Second) && "translated from user to non-user!?");
- // Operands of CallInst are skipped because they may not be Bool type,
- // and their positions are defined by ABI.
- if (First && !isa<CallInst>(First))
+ // Operands of CallInst/Constant are skipped because they may not be Bool
+ // type. For CallInst, their positions are defined by ABI.
+ if (First && !isa<CallInst>(First) && !isa<Constant>(First))
for (unsigned i = 0; i < First->getNumOperands(); ++i)
Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
}
@@ -280,8 +282,8 @@ private:
} // end anonymous namespace
char PPCBoolRetToInt::ID = 0;
-INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
- "Convert i1 constants to i32/i64 if they are returned",
- false, false)
+INITIALIZE_PASS(PPCBoolRetToInt, "ppc-bool-ret-to-int",
+ "Convert i1 constants to i32/i64 if they are returned", false,
+ false)
FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/llvm/lib/Target/PowerPC/PPCCCState.cpp b/llvm/lib/Target/PowerPC/PPCCCState.cpp
index 5116f0d121f4..79ffc6627a61 100644
--- a/llvm/lib/Target/PowerPC/PPCCCState.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCCState.cpp
@@ -32,4 +32,4 @@ void PPCCCState::PreAnalyzeFormalArguments(
OriginalArgWasPPCF128.push_back(false);
}
}
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index bb12e05173a6..b9518d6d7064 100644
--- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -1,4 +1,4 @@
-//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
+//===-- PPCCTRLoops.cpp - Verify CTR loops -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,74 +6,48 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass identifies loops where we can generate the PPC branch instructions
-// that decrement and test the count register (CTR) (bdnz and friends).
-//
-// The pattern that defines the induction variable can changed depending on
-// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
-// normalizes induction variables, and the Loop Strength Reduction pass
-// run by 'llc' may also make changes to the induction variable.
-//
-// Criteria for CTR loops:
-// - Countable loops (w/ ind. var for a trip count)
-// - Try inner-most loops first
-// - No nested CTR loops.
-// - No function calls in loops.
+// This pass verifies that all bdnz/bdz instructions are dominated by a loop
+// mtctr before any other instructions that might clobber the ctr register.
//
//===----------------------------------------------------------------------===//
+// CTR loops are produced by the HardwareLoops pass and this pass is simply a
+// verification that no invalid CTR loops are produced. As such, it isn't
+// something that needs to be run (or even defined) for Release builds so the
+// entire file is guarded by NDEBUG.
+#ifndef NDEBUG
+#include <vector>
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
-#include "PPCSubtarget.h"
-#include "PPCTargetMachine.h"
-#include "PPCTargetTransformInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/ValueHandle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-
-#ifndef NDEBUG
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#endif
using namespace llvm;
-#define DEBUG_TYPE "ctrloops"
-
-#ifndef NDEBUG
-static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
-#endif
+#define DEBUG_TYPE "ppc-ctrloops-verify"
namespace {
-#ifndef NDEBUG
struct PPCCTRLoopsVerify : public MachineFunctionPass {
public:
static char ID;
@@ -94,10 +68,8 @@ namespace {
};
char PPCCTRLoopsVerify::ID = 0;
-#endif // NDEBUG
} // end anonymous namespace
-#ifndef NDEBUG
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
"PowerPC CTR Loops Verify", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -107,9 +79,7 @@ INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
FunctionPass *llvm::createPPCCTRLoopsVerify() {
return new PPCCTRLoopsVerify();
}
-#endif // NDEBUG
-#ifndef NDEBUG
static bool clobbersCTR(const MachineInstr &MI) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
@@ -178,9 +148,7 @@ queue_preds:
return false;
}
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PIE = MBB->pred_end(); PI != PIE; ++PI)
- Preds.push_back(*PI);
+ append_range(Preds, MBB->predecessors());
}
do {
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 1eaa7f7a44b3..cc3486718179 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -59,10 +59,7 @@ def RetCC_PPC_Cold : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F1]>>,
CCIfType<[f64], CCAssignToReg<[F1]>>,
- CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>,
-
- CCIfType<[v4f64, v4f32, v4i1],
- CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>,
+ CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2]>>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
@@ -95,13 +92,9 @@ def RetCC_PPC : CallingConv<[
// For P9, f128 are passed in vector registers.
CCIfType<[f128],
- CCIfSubtarget<"hasP9Vector()",
+ CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
- // QPX vectors are returned in QF1 and QF2.
- CCIfType<[v4f64, v4f32, v4i1],
- CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
-
// Vector types returned as "direct" go into V2 .. V9; note that only the
// ELFv2 ABI fully utilizes all these registers.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
@@ -156,10 +149,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f128],
- CCIfSubtarget<"hasP9Vector()",
+ CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
- CCIfType<[v4f64, v4f32, v4i1],
- CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
@@ -223,12 +214,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>,
CCIfType<[f64], CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>,
- // QPX vectors that are stored in double precision need 32-byte alignment.
- CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
-
// Vectors and float128 get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>,
- CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>>
+ CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToStack<16, 16>>>
]>;
// This calling convention puts vector arguments always on the stack. It is used
@@ -243,10 +231,6 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
// put vector arguments in vector registers before putting them on the stack.
let Entry = 1 in
def CC_PPC32_SVR4 : CallingConv<[
- // QPX vectors mirror the scalar FP convention.
- CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
- CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
-
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
@@ -254,7 +238,7 @@ def CC_PPC32_SVR4 : CallingConv<[
// Float128 types treated as vector arguments.
CCIfType<[f128],
- CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
+ CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
V8, V9, V10, V11, V12, V13]>>>,
CCDelegateTo<CC_PPC32_SVR4_Common>
@@ -307,6 +291,8 @@ def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
+def CSR_AIX32_Altivec : CalleeSavedRegs<(add CSR_AIX32, CSR_Altivec)>;
+
// Common CalleeSavedRegs for SVR4 and AIX.
def CSR_PPC64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
diff --git a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index c9f74bbf861c..08b7bdb3ac1e 100644
--- a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -77,8 +77,9 @@ protected:
if (J->getOperand(0).getMBB() == &ReturnMBB) {
// This is an unconditional branch to the return. Replace the
// branch with a blr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()))
- .copyImplicitOps(*I);
+ MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+ (*PI)->insert(J, MI);
+
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -89,10 +90,13 @@ protected:
if (J->getOperand(2).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+ MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+ MI->setDesc(TII->get(PPC::BCCLR));
+ MachineInstrBuilder(*ReturnMBB.getParent(), MI)
.add(J->getOperand(0))
- .add(J->getOperand(1))
- .copyImplicitOps(*I);
+ .add(J->getOperand(1));
+ (*PI)->insert(J, MI);
+
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -103,11 +107,13 @@ protected:
if (J->getOperand(1).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- BuildMI(
- **PI, J, J->getDebugLoc(),
- TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
- .add(J->getOperand(0))
- .copyImplicitOps(*I);
+ MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+ MI->setDesc(
+ TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn));
+ MachineInstrBuilder(*ReturnMBB.getParent(), MI)
+ .add(J->getOperand(0));
+ (*PI)->insert(J, MI);
+
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 39790ac9a8aa..c181816e31c6 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -86,7 +86,6 @@ typedef struct Address {
class PPCFastISel final : public FastISel {
const TargetMachine &TM;
- const PPCSubtarget *PPCSubTarget;
const PPCSubtarget *Subtarget;
PPCFunctionInfo *PPCFuncInfo;
const TargetInstrInfo &TII;
@@ -97,7 +96,6 @@ class PPCFastISel final : public FastISel {
explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
- PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
@@ -1567,6 +1565,10 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (IsVarArg)
return false;
+ // If this is a PC-Rel function, let SDISel handle the call.
+ if (Subtarget->isUsingPCRelativeCalls())
+ return false;
+
// Handle simple calls for now, with legal return types and
// those that can be extended.
Type *RetTy = CLI.RetTy;
@@ -1622,7 +1624,10 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
return false;
- if (ArgVT.isVector())
+ // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
+ // types, which is passed through vector register. Skip these types and
+ // fallback to default SelectionDAG based selection.
+ if (ArgVT.isVector() || ArgVT == MVT::f128)
return false;
unsigned Arg = getRegForValue(ArgValue);
@@ -1991,6 +1996,10 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
// Materialize a floating-point constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
+ // If this is a PC-Rel function, let SDISel handle constant pool.
+ if (Subtarget->isUsingPCRelativeCalls())
+ return false;
+
// No plans to handle long double here.
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
@@ -2055,6 +2064,10 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
// Materialize the address of a global value into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
+ // If this is a PC-Rel function, let SDISel handle GV materialization.
+ if (Subtarget->isUsingPCRelativeCalls())
+ return false;
+
assert(VT == MVT::i64 && "Non-address!");
const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
unsigned DestReg = createResultReg(RC);
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index bd9174c1973d..50ce11b8374f 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -39,15 +39,6 @@ EnablePEVectorSpills("ppc-enable-pe-vector-spills",
cl::desc("Enable spills in prologue to vector registers."),
cl::init(false), cl::Hidden);
-/// VRRegNo - Map from a numbered VR register to its enum value.
-///
-static const MCPhysReg VRRegNo[] = {
- PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
- PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
if (STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
@@ -227,19 +218,14 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
CALLEE_SAVED_VRS
};
- static const SpillSlot AIXOffsets32[] = {
- CALLEE_SAVED_FPRS,
- CALLEE_SAVED_GPRS32,
- // Add AIX's extra CSR.
- {PPC::R13, -76},
- // TODO: Update when we add vector support for AIX.
- };
+ static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
+ // Add AIX's extra CSR.
+ {PPC::R13, -76},
+ CALLEE_SAVED_VRS};
static const SpillSlot AIXOffsets64[] = {
- CALLEE_SAVED_FPRS,
- CALLEE_SAVED_GPRS64,
- // TODO: Update when we add vector support for AIX.
- };
+ CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
if (Subtarget.is64BitELFABI()) {
NumEntries = array_lengthof(ELFOffsets64);
@@ -262,153 +248,11 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
return AIXOffsets32;
}
-/// RemoveVRSaveCode - We have found that this function does not need any code
-/// to manipulate the VRSAVE register, even though it uses vector registers.
-/// This can happen when the only registers used are known to be live in or out
-/// of the function. Remove all of the VRSAVE related code from the function.
-/// FIXME: The removal of the code results in a compile failure at -O0 when the
-/// function contains a function call, as the GPR containing original VRSAVE
-/// contents is spilled and reloaded around the call. Without the prolog code,
-/// the spill instruction refers to an undefined register. This code needs
-/// to account for all uses of that GPR.
-static void RemoveVRSaveCode(MachineInstr &MI) {
- MachineBasicBlock *Entry = MI.getParent();
- MachineFunction *MF = Entry->getParent();
-
- // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
- MachineBasicBlock::iterator MBBI = MI;
- ++MBBI;
- assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
- MBBI->eraseFromParent();
-
- bool RemovedAllMTVRSAVEs = true;
- // See if we can find and remove the MTVRSAVE instruction from all of the
- // epilog blocks.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
- // If last instruction is a return instruction, add an epilogue
- if (I->isReturnBlock()) {
- bool FoundIt = false;
- for (MBBI = I->end(); MBBI != I->begin(); ) {
- --MBBI;
- if (MBBI->getOpcode() == PPC::MTVRSAVE) {
- MBBI->eraseFromParent(); // remove it.
- FoundIt = true;
- break;
- }
- }
- RemovedAllMTVRSAVEs &= FoundIt;
- }
- }
-
- // If we found and removed all MTVRSAVE instructions, remove the read of
- // VRSAVE as well.
- if (RemovedAllMTVRSAVEs) {
- MBBI = MI;
- assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
- --MBBI;
- assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
- MBBI->eraseFromParent();
- }
-
- // Finally, nuke the UPDATE_VRSAVE.
- MI.eraseFromParent();
-}
-
-// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
-// instruction selector. Based on the vector registers that have been used,
-// transform this into the appropriate ORI instruction.
-static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
- MachineFunction *MF = MI.getParent()->getParent();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- DebugLoc dl = MI.getDebugLoc();
-
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned UsedRegMask = 0;
- for (unsigned i = 0; i != 32; ++i)
- if (MRI.isPhysRegModified(VRRegNo[i]))
- UsedRegMask |= 1 << (31-i);
-
- // Live in and live out values already must be in the mask, so don't bother
- // marking them.
- for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
- unsigned RegNo = TRI->getEncodingValue(LI.first);
- if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
- }
-
- // Live out registers appear as use operands on return instructions.
- for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
- UsedRegMask != 0 && BI != BE; ++BI) {
- const MachineBasicBlock &MBB = *BI;
- if (!MBB.isReturnBlock())
- continue;
- const MachineInstr &Ret = MBB.back();
- for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = Ret.getOperand(I);
- if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
- continue;
- unsigned RegNo = TRI->getEncodingValue(MO.getReg());
- UsedRegMask &= ~(1 << (31-RegNo));
- }
- }
-
- // If no registers are used, turn this into a copy.
- if (UsedRegMask == 0) {
- // Remove all VRSAVE code.
- RemoveVRSaveCode(MI);
- return;
- }
-
- Register SrcReg = MI.getOperand(1).getReg();
- Register DstReg = MI.getOperand(0).getReg();
-
- if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask);
- } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask >> 16);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask >> 16);
- } else {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask >> 16);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask >> 16);
-
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(DstReg, RegState::Kill)
- .addImm(UsedRegMask & 0xFFFF);
- }
-
- // Remove the old UPDATE_VRSAVE instruction.
- MI.eraseFromParent();
-}
-
static bool spillsCR(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->isCRSpilled();
}
-static bool spillsVRSAVE(const MachineFunction &MF) {
- const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- return FuncInfo->isVRSAVESpilled();
-}
-
static bool hasSpills(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->hasSpills();
@@ -474,7 +318,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
!FI->mustSaveTOC() && // No need to save TOC.
!RegInfo->hasBasePointer(MF); // No special alignment.
- // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
+ // Note: for PPC32 SVR4ABI, we can still generate stackless
// code if all local vars are reg-allocated.
bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
@@ -531,9 +375,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
- (MF.getTarget().Options.GuaranteedTailCallOpt &&
- MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+ MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
+ MF.exposesReturnsTwice() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
@@ -681,6 +526,8 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
// register is available, we can adjust for that by not overlapping the spill
// code. However, if we need to realign the stack (i.e. have a base pointer)
// and the stack frame is large, we need two scratch registers.
+// Also, stack probe requires two scratch registers, one for old sp, one for
+// large frame and large probe size.
bool
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -692,8 +539,10 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
Align MaxAlign = MFI.getMaxAlign();
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
- return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
+ return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
+ TLI.hasInlineStackProbe(MF);
}
bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
@@ -736,8 +585,8 @@ bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
// Frame pointers and base pointers complicate matters so don't do anything
// if we have them. For example having a frame pointer will sometimes require
// a copy of r1 into r31 and that makes keeping track of updates to r1 more
- // difficult.
- if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+ // difficult. Similar situation exists with setjmp.
+ if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
return false;
// Calls to fast_cc functions use different rules for passing parameters on
@@ -771,24 +620,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool isAIXABI = Subtarget.isAIXABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
-
- // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
- // process it.
- if (!isSVR4ABI)
- for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
- if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
- if (isAIXABI)
- report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
- HandleVRSaveUpdate(*MBBI, TII);
- break;
- }
- }
-
- // Move MBBI back to the beginning of the prologue block.
- MBBI = MBB.begin();
+ assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
// Work out frame sizes.
unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
@@ -848,12 +681,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
// Using the same bool variable as below to suppress compiler warnings.
- // Stack probe requires two scratch registers, one for old sp, one for large
- // frame and large probe size.
bool SingleScratchReg = findScratchRegister(
- &MBB, false,
- twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
- &ScratchReg, &TempReg);
+ &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
assert(SingleScratchReg &&
"Required number of registers not available in this block");
@@ -863,26 +692,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
int FPOffset = 0;
if (HasFP) {
- if (isSVR4ABI) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
- int FPIndex = FI->getFramePointerSaveIndex();
- assert(FPIndex && "No Frame Pointer Save Slot!");
- FPOffset = MFI.getObjectOffset(FPIndex);
- } else {
- FPOffset = getFramePointerSaveOffset();
- }
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = MFI.getObjectOffset(FPIndex);
}
int BPOffset = 0;
if (HasBP) {
- if (isSVR4ABI) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
- int BPIndex = FI->getBasePointerSaveIndex();
- assert(BPIndex && "No Base Pointer Save Slot!");
- BPOffset = MFI.getObjectOffset(BPIndex);
- } else {
- BPOffset = getBasePointerSaveOffset();
- }
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = MFI.getObjectOffset(BPIndex);
}
int PBPOffset = 0;
@@ -1382,10 +1203,12 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
if (StackAllocMIPos == PrologMBB.end())
return;
const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
+ MachineBasicBlock *CurrentMBB = &PrologMBB;
DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
MachineInstr &MI = *StackAllocMIPos;
int64_t NegFrameSize = MI.getOperand(2).getImm();
- int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
+ unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ int64_t NegProbeSize = -(int64_t)ProbeSize;
assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
int64_t NumBlocks = NegFrameSize / NegProbeSize;
int64_t NegResidualSize = NegFrameSize % NegProbeSize;
@@ -1394,10 +1217,9 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
Register FPReg = MI.getOperand(1).getReg();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
bool HasBP = RegInfo->hasBasePointer(MF);
+ Register BPReg = RegInfo->getBaseRegister(MF);
Align MaxAlign = MFI.getMaxAlign();
- // Initialize current frame pointer.
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
- BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
// Subroutines to generate .cfi_* directives.
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, Register Reg) {
@@ -1437,90 +1259,218 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
// Subroutine to store frame pointer and decrease stack pointer by probe size.
auto allocateAndProbe = [&](MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int64_t NegSize,
- Register NegSizeReg, bool UseDForm) {
+ Register NegSizeReg, bool UseDForm,
+ Register StoreReg) {
if (UseDForm)
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
- .addReg(FPReg)
+ .addReg(StoreReg)
.addImm(NegSize)
.addReg(SPReg);
else
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
- .addReg(FPReg)
+ .addReg(StoreReg)
.addReg(SPReg)
.addReg(NegSizeReg);
};
- // Use FPReg to calculate CFA.
- if (needsCFI)
- buildDefCFA(PrologMBB, {MI}, FPReg, 0);
- // For case HasBP && MaxAlign > 1, we have to align the SP by performing
+ // Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
+ // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
+ // available and r1 is already copied to r30 which is BPReg. So BPReg stores
+ // the value of stackptr.
+ // First we have to probe tail interval whose size is less than probesize,
+ // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
+ // ScratchReg stores the value of ((stackptr % align) % probesize). Then we
+ // probe each block sized probesize until stackptr meets
+ // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
+ // as negprobesize. At both stages, TempReg stores the value of
+ // (stackptr - (stackptr % align)).
+ auto dynamicProbe = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register ScratchReg,
+ Register TempReg) {
+ assert(HasBP && isPPC64 && "Probe alignment part not available");
+ assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
+ // ScratchReg = stackptr % align
+ BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(BPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ // TempReg = stackptr - (stackptr % align)
+ BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
+ .addReg(ScratchReg)
+ .addReg(BPReg);
+ // ScratchReg = (stackptr % align) % probesize
+ BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0)
+ .addImm(64 - Log2(ProbeSize));
+ Register CRReg = PPC::CR0;
+ // If (stackptr % align) % probesize == 0, we should not generate probe
+ // code. Layout of output assembly kinda like:
+ // bb.0:
+ // ...
+ // cmpldi $scratchreg, 0
+ // beq bb.2
+ // bb.1: # Probe tail interval
+ // neg $scratchreg, $scratchreg
+ // stdux $bpreg, r1, $scratchreg
+ // bb.2:
+ // <materialize negprobesize into $scratchreg>
+ // cmpd r1, $tempreg
+ // beq bb.4
+ // bb.3: # Loop to probe each block
+ // stdux $bpreg, r1, $scratchreg
+ // cmpd r1, $tempreg
+ // bne bb.3
+ // bb.4:
+ // ...
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ProbeResidualMBB);
+ MachineBasicBlock *ProbeLoopPreHeaderMBB =
+ MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
+ MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
+ MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ProbeExitMBB);
+ // bb.4
+ ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
+ ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ // bb.0
+ BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
+ BuildMI(&MBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_EQ)
+ .addReg(CRReg)
+ .addMBB(ProbeLoopPreHeaderMBB);
+ MBB.addSuccessor(ProbeResidualMBB);
+ MBB.addSuccessor(ProbeLoopPreHeaderMBB);
+ // bb.1
+ BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
+ .addReg(ScratchReg);
+ allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
+ false, BPReg);
+ ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
+ // bb.2
+ MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
+ NegProbeSize, ScratchReg);
+ BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
+ .addReg(SPReg)
+ .addReg(TempReg);
+ BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_EQ)
+ .addReg(CRReg)
+ .addMBB(ProbeExitMBB);
+ ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
+ ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
+ // bb.3
+ allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
+ false, BPReg);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
+ .addReg(SPReg)
+ .addReg(TempReg);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_NE)
+ .addReg(CRReg)
+ .addMBB(ProbeLoopBodyMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+ // Update liveins.
+ recomputeLiveIns(*ProbeResidualMBB);
+ recomputeLiveIns(*ProbeLoopPreHeaderMBB);
+ recomputeLiveIns(*ProbeLoopBodyMBB);
+ recomputeLiveIns(*ProbeExitMBB);
+ return ProbeExitMBB;
+ };
+ // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
// SP = SP - SP % MaxAlign.
if (HasBP && MaxAlign > 1) {
- if (isPPC64)
- BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(FPReg)
- .addImm(0)
- .addImm(64 - Log2(MaxAlign));
- else
- BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
+ // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
+ // 64-bit mode.
+ if (isPPC64) {
+ // Use BPReg to calculate CFA.
+ if (needsCFI)
+ buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
+ // Since we have SPReg copied to BPReg at the moment, FPReg can be used as
+ // TempReg.
+ Register TempReg = FPReg;
+ CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
+ // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
+ BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
+ .addReg(BPReg)
+ .addReg(BPReg);
+ } else {
+ // Initialize current frame pointer.
+ BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
+ // Use FPReg to calculate CFA.
+ if (needsCFI)
+ buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
.addReg(FPReg)
.addImm(0)
.addImm(32 - Log2(MaxAlign))
.addImm(31);
- BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
- SPReg)
- .addReg(FPReg)
- .addReg(SPReg)
- .addReg(ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
+ .addReg(ScratchReg)
+ .addReg(SPReg);
+ }
+ } else {
+ // Initialize current frame pointer.
+ BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+ // Use FPReg to calculate CFA.
+ if (needsCFI)
+ buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
}
// Probe residual part.
if (NegResidualSize) {
bool ResidualUseDForm = CanUseDForm(NegResidualSize);
if (!ResidualUseDForm)
- MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
- allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
- ResidualUseDForm);
+ MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm, FPReg);
}
bool UseDForm = CanUseDForm(NegProbeSize);
// If number of blocks is small, just probe them directly.
if (NumBlocks < 3) {
if (!UseDForm)
- MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
for (int i = 0; i < NumBlocks; ++i)
- allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
+ allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
+ FPReg);
if (needsCFI) {
// Restore using SPReg to calculate CFA.
- buildDefCFAReg(PrologMBB, {MI}, SPReg);
+ buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
}
} else {
// Since CTR is a volatile register and current shrinkwrap implementation
// won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
// CTR loop to probe.
// Calculate trip count and stores it in CTRReg.
- MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
- BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
.addReg(ScratchReg, RegState::Kill);
if (!UseDForm)
- MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
// Create MBBs of the loop.
MachineFunction::iterator MBBInsertPoint =
- std::next(PrologMBB.getIterator());
+ std::next(CurrentMBB->getIterator());
MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, LoopMBB);
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ExitMBB);
// Synthesize the loop body.
allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
- UseDForm);
+ UseDForm, FPReg);
BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
.addMBB(LoopMBB);
LoopMBB->addSuccessor(ExitMBB);
LoopMBB->addSuccessor(LoopMBB);
// Synthesize the exit MBB.
- ExitMBB->splice(ExitMBB->end(), &PrologMBB,
+ ExitMBB->splice(ExitMBB->end(), CurrentMBB,
std::next(MachineBasicBlock::iterator(MI)),
- PrologMBB.end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
- PrologMBB.addSuccessor(LoopMBB);
+ CurrentMBB->end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
+ CurrentMBB->addSuccessor(LoopMBB);
if (needsCFI) {
// Restore using SPReg to calculate CFA.
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
@@ -1552,8 +1502,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
- // Get the ABI.
- bool isSVR4ABI = Subtarget.isSVR4ABI();
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1601,24 +1549,16 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
SingleScratchReg = ScratchReg == TempReg;
if (HasFP) {
- if (isSVR4ABI) {
- int FPIndex = FI->getFramePointerSaveIndex();
- assert(FPIndex && "No Frame Pointer Save Slot!");
- FPOffset = MFI.getObjectOffset(FPIndex);
- } else {
- FPOffset = getFramePointerSaveOffset();
- }
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = MFI.getObjectOffset(FPIndex);
}
int BPOffset = 0;
if (HasBP) {
- if (isSVR4ABI) {
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
BPOffset = MFI.getObjectOffset(BPIndex);
- } else {
- BPOffset = getBasePointerSaveOffset();
- }
}
int PBPOffset = 0;
@@ -1704,11 +1644,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
// zone add this offset back now.
+ // If the function has a base pointer, the stack pointer has been copied
+ // to it so we can restore it by copying in the other direction.
+ if (HasRedZone && HasBP) {
+ BuildMI(MBB, MBBI, dl, OrInst, RBReg).
+ addReg(BPReg).
+ addReg(BPReg);
+ }
// If this function contained a fastcc call and GuaranteedTailCallOpt is
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
// call which invalidates the stack pointer value in SP(0). So we use the
- // value of R31 in this case.
- if (FI->hasFastCall()) {
+ // value of R31 in this case. Similar situation exists with setjmp.
+ else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
assert(HasFP && "Expecting a valid frame pointer.");
if (!HasRedZone)
RBReg = FPReg;
@@ -2054,7 +2001,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
bool HasGPSaveArea = false;
bool HasG8SaveArea = false;
bool HasFPSaveArea = false;
- bool HasVRSAVESaveArea = false;
bool HasVRSaveArea = false;
SmallVector<CalleeSavedInfo, 18> GPRegs;
@@ -2094,8 +2040,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
} else if (PPC::CRBITRCRegClass.contains(Reg) ||
PPC::CRRCRegClass.contains(Reg)) {
; // do nothing, as we already know whether CRs are spilled
- } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
- HasVRSAVESaveArea = true;
} else if (PPC::VRRCRegClass.contains(Reg) ||
PPC::SPERCRegClass.contains(Reg)) {
// Altivec and SPE are mutually exclusive, but have the same stack
@@ -2218,23 +2162,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
LowerBound -= 4; // The CR save area is always 4 bytes long.
}
- if (HasVRSAVESaveArea) {
- // FIXME SVR4: Is it actually possible to have multiple elements in CSI
- // which have the VRSAVE register class?
- // Adjust the frame index of the VRSAVE spill slot.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- if (PPC::VRSAVERCRegClass.contains(Reg)) {
- int FI = CSI[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
- }
- }
-
- LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
- }
-
// Both Altivec and SPE have the same alignment and padding requirements
// within the stack frame.
if (HasVRSaveArea) {
@@ -2274,8 +2201,8 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// needed alignment padding.
unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
- hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
+ (hasSpills(MF) && !isInt<16>(StackSize))) {
const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
@@ -2289,7 +2216,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
// These kinds of spills might need two registers.
- if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
+ if (spillsCR(MF) || HasAlVars)
RS->addScavengingFrameIndex(
MFI.CreateStackObject(Size, Alignment, false));
}
@@ -2366,9 +2293,6 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
- if (Reg == PPC::VRSAVE)
- continue;
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
@@ -2533,10 +2457,6 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
- if (Reg == PPC::VRSAVE)
- continue;
-
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue;
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 8ffd89ef5ccd..693b0adaede4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -138,7 +139,6 @@ namespace {
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
- const PPCSubtarget *PPCSubTarget = nullptr;
const PPCSubtarget *Subtarget = nullptr;
const PPCTargetLowering *PPCLowering = nullptr;
unsigned GlobalBaseReg = 0;
@@ -150,14 +150,10 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
- PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
Subtarget = &MF.getSubtarget<PPCSubtarget>();
PPCLowering = Subtarget->getTargetLowering();
SelectionDAGISel::runOnMachineFunction(MF);
- if (!Subtarget->isSVR4ABI())
- InsertVRSaveCode(MF);
-
return true;
}
@@ -218,7 +214,7 @@ namespace {
/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- const SDLoc &dl);
+ const SDLoc &dl, SDValue Chain = SDValue());
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
/// immediate field. Note that the operand at this point is already the
@@ -295,6 +291,13 @@ namespace {
Align(16));
}
+ /// SelectAddrImmX34 - Returns true if the address N can be represented by
+ /// a base register plus a signed 34-bit displacement. Suitable for use by
+ /// PSTXVP and friends.
+ bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
+ }
+
// Select an address into a single register.
bool SelectAddr(SDValue N, SDValue &Base) {
Base = N;
@@ -340,8 +343,6 @@ namespace {
return true;
}
- void InsertVRSaveCode(MachineFunction &MF);
-
StringRef getPassName() const override {
return "PowerPC DAG->DAG Pattern Instruction Selection";
}
@@ -351,6 +352,7 @@ namespace {
private:
bool trySETCC(SDNode *N);
+ bool tryFoldSWTestBRCC(SDNode *N);
bool tryAsSingleRLDICL(SDNode *N);
bool tryAsSingleRLDICR(SDNode *N);
bool tryAsSingleRLWINM(SDNode *N);
@@ -375,70 +377,6 @@ private:
} // end anonymous namespace
-/// InsertVRSaveCode - Once the entire function has been instruction selected,
-/// all virtual registers are created and all machine instructions are built,
-/// check to see if we need to save/restore VRSAVE. If so, do it.
-void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
- // Check to see if this function uses vector registers, which means we have to
- // save and restore the VRSAVE register and update it with the regs we use.
- //
- // In this case, there will be virtual registers of vector type created
- // by the scheduler. Detect them now.
- bool HasVectorVReg = false;
- for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
- if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
- HasVectorVReg = true;
- break;
- }
- }
- if (!HasVectorVReg) return; // nothing to do.
-
- // If we have a vector register, we want to emit code into the entry and exit
- // blocks to save and restore the VRSAVE register. We do this here (instead
- // of marking all vector instructions as clobbering VRSAVE) for two reasons:
- //
- // 1. This (trivially) reduces the load on the register allocator, by not
- // having to represent the live range of the VRSAVE register.
- // 2. This (more significantly) allows us to create a temporary virtual
- // register to hold the saved VRSAVE value, allowing this temporary to be
- // register allocated, instead of forcing it to be spilled to the stack.
-
- // Create two vregs - one to hold the VRSAVE register that is live-in to the
- // function and one for the value after having bits or'd into it.
- Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
-
- const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- MachineBasicBlock &EntryBB = *Fn.begin();
- DebugLoc dl;
- // Emit the following code into the entry block:
- // InVRSAVE = MFVRSAVE
- // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
- // MTVRSAVE UpdatedVRSAVE
- MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
- BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
- BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
- UpdatedVRSAVE).addReg(InVRSAVE);
- BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
-
- // Find all return blocks, outputting a restore in each epilog.
- for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (BB->isReturnBlock()) {
- IP = BB->end(); --IP;
-
- // Skip over all terminator instructions, which are part of the return
- // sequence.
- MachineBasicBlock::iterator I2 = IP;
- while (I2 != BB->begin() && (--I2)->isTerminator())
- IP = I2;
-
- // Emit: MTVRSAVE InVRSave
- BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
- }
- }
-}
-
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
@@ -648,6 +586,8 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
SDValue Offset = ST->getOffset();
if (!Offset.isUndef())
return false;
+ if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
+ return false;
SDLoc dl(ST);
EVT MemVT = ST->getMemoryVT();
@@ -691,6 +631,8 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
SDValue Offset = LD->getOffset();
if (!Offset.isUndef())
return false;
+ if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
+ return false;
SDLoc dl(LD);
EVT MemVT = LD->getMemoryVT();
@@ -800,251 +742,6 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
return false;
}
-// Predict the number of instructions that would be generated by calling
-// selectI64Imm(N).
-static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
- // Assume no remaining bits.
- unsigned Remainder = 0;
- // Assume no shift required.
- unsigned Shift = 0;
-
- // If it can't be represented as a 32 bit value.
- if (!isInt<32>(Imm)) {
- Shift = countTrailingZeros<uint64_t>(Imm);
- int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
-
- // If the shifted value fits 32 bits.
- if (isInt<32>(ImmSh)) {
- // Go with the shifted value.
- Imm = ImmSh;
- } else {
- // Still stuck with a 64 bit value.
- Remainder = Imm;
- Shift = 32;
- Imm >>= 32;
- }
- }
-
- // Intermediate operand.
- unsigned Result = 0;
-
- // Handle first 32 bits.
- unsigned Lo = Imm & 0xFFFF;
-
- // Simple value.
- if (isInt<16>(Imm)) {
- // Just the Lo bits.
- ++Result;
- } else if (Lo) {
- // Handle the Hi bits and Lo bits.
- Result += 2;
- } else {
- // Just the Hi bits.
- ++Result;
- }
-
- // If no shift, we're done.
- if (!Shift) return Result;
-
- // If Hi word == Lo word,
- // we can use rldimi to insert the Lo word into Hi word.
- if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
- ++Result;
- return Result;
- }
-
- // Shift for next step if the upper 32-bits were not zero.
- if (Imm)
- ++Result;
-
- // Add in the last bits as required.
- if ((Remainder >> 16) & 0xFFFF)
- ++Result;
- if (Remainder & 0xFFFF)
- ++Result;
-
- return Result;
-}
-
-static uint64_t Rot64(uint64_t Imm, unsigned R) {
- return (Imm << R) | (Imm >> (64 - R));
-}
-
-static unsigned selectI64ImmInstrCount(int64_t Imm) {
- unsigned Count = selectI64ImmInstrCountDirect(Imm);
-
- // If the instruction count is 1 or 2, we do not need further analysis
- // since rotate + load constant requires at least 2 instructions.
- if (Count <= 2)
- return Count;
-
- for (unsigned r = 1; r < 63; ++r) {
- uint64_t RImm = Rot64(Imm, r);
- unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
- Count = std::min(Count, RCount);
-
- // See comments in selectI64Imm for an explanation of the logic below.
- unsigned LS = findLastSet(RImm);
- if (LS != r-1)
- continue;
-
- uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
- uint64_t RImmWithOnes = RImm | OnesMask;
-
- RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
- Count = std::min(Count, RCount);
- }
-
- return Count;
-}
-
-// Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
-// (above) needs to be kept in sync with this function.
-static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
- int64_t Imm) {
- // Assume no remaining bits.
- unsigned Remainder = 0;
- // Assume no shift required.
- unsigned Shift = 0;
-
- // If it can't be represented as a 32 bit value.
- if (!isInt<32>(Imm)) {
- Shift = countTrailingZeros<uint64_t>(Imm);
- int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
-
- // If the shifted value fits 32 bits.
- if (isInt<32>(ImmSh)) {
- // Go with the shifted value.
- Imm = ImmSh;
- } else {
- // Still stuck with a 64 bit value.
- Remainder = Imm;
- Shift = 32;
- Imm >>= 32;
- }
- }
-
- // Intermediate operand.
- SDNode *Result;
-
- // Handle first 32 bits.
- unsigned Lo = Imm & 0xFFFF;
- unsigned Hi = (Imm >> 16) & 0xFFFF;
-
- auto getI32Imm = [CurDAG, dl](unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
- };
-
- // Simple value.
- if (isInt<16>(Imm)) {
- uint64_t SextImm = SignExtend64(Lo, 16);
- SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
- // Just the Lo bits.
- Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
- } else if (Lo) {
- // Handle the Hi bits.
- unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
- Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
- // And Lo bits.
- Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Lo));
- } else {
- // Just the Hi bits.
- Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
- }
-
- // If no shift, we're done.
- if (!Shift) return Result;
-
- // If Hi word == Lo word,
- // we can use rldimi to insert the Lo word into Hi word.
- if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
- SDValue Ops[] =
- { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
- return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
- }
-
- // Shift for next step if the upper 32-bits were not zero.
- if (Imm) {
- Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
- SDValue(Result, 0),
- getI32Imm(Shift),
- getI32Imm(63 - Shift));
- }
-
- // Add in the last bits as required.
- if ((Hi = (Remainder >> 16) & 0xFFFF)) {
- Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Hi));
- }
- if ((Lo = Remainder & 0xFFFF)) {
- Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Lo));
- }
-
- return Result;
-}
-
-static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
- int64_t Imm) {
- unsigned Count = selectI64ImmInstrCountDirect(Imm);
-
- // If the instruction count is 1 or 2, we do not need further analysis
- // since rotate + load constant requires at least 2 instructions.
- if (Count <= 2)
- return selectI64ImmDirect(CurDAG, dl, Imm);
-
- unsigned RMin = 0;
-
- int64_t MatImm;
- unsigned MaskEnd;
-
- for (unsigned r = 1; r < 63; ++r) {
- uint64_t RImm = Rot64(Imm, r);
- unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
- if (RCount < Count) {
- Count = RCount;
- RMin = r;
- MatImm = RImm;
- MaskEnd = 63;
- }
-
- // If the immediate to generate has many trailing zeros, it might be
- // worthwhile to generate a rotated value with too many leading ones
- // (because that's free with li/lis's sign-extension semantics), and then
- // mask them off after rotation.
-
- unsigned LS = findLastSet(RImm);
- // We're adding (63-LS) higher-order ones, and we expect to mask them off
- // after performing the inverse rotation by (64-r). So we need that:
- // 63-LS == 64-r => LS == r-1
- if (LS != r-1)
- continue;
-
- uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
- uint64_t RImmWithOnes = RImm | OnesMask;
-
- RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
- if (RCount < Count) {
- Count = RCount;
- RMin = r;
- MatImm = RImmWithOnes;
- MaskEnd = LS;
- }
- }
-
- if (!RMin)
- return selectI64ImmDirect(CurDAG, dl, Imm);
-
- auto getI32Imm = [CurDAG, dl](unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
- };
-
- SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
- return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
- getI32Imm(64 - RMin), getI32Imm(MaskEnd));
-}
-
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
unsigned MaxTruncation = 0;
// Cannot use range-based for loop here as we need the actual use (i.e. we
@@ -1101,6 +798,274 @@ static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
return MaxTruncation;
}
+// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
+// zeros and return the number of bits by the left of these consecutive zeros.
+static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
+ unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
+ unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
+ if ((HiTZ + LoLZ) >= Num)
+ return (32 + HiTZ);
+ return 0;
+}
+
+// Direct materialization of 64-bit constants by enumerated patterns.
+static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
+ uint64_t Imm, unsigned &InstCnt) {
+ unsigned TZ = countTrailingZeros<uint64_t>(Imm);
+ unsigned LZ = countLeadingZeros<uint64_t>(Imm);
+ unsigned TO = countTrailingOnes<uint64_t>(Imm);
+ unsigned LO = countLeadingOnes<uint64_t>(Imm);
+ unsigned Hi32 = Hi_32(Imm);
+ unsigned Lo32 = Lo_32(Imm);
+ SDNode *Result = nullptr;
+ unsigned Shift = 0;
+
+ auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ };
+
+ // Following patterns use 1 instructions to materialize the Imm.
+ InstCnt = 1;
+ // 1-1) Patterns : {zeros}{15-bit valve}
+ // {ones}{15-bit valve}
+ if (isInt<16>(Imm)) {
+ SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
+ return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
+ }
+ // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
+ // {ones}{15-bit valve}{16 zeros}
+ if (TZ > 15 && (LZ > 32 || LO > 32))
+ return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
+ getI32Imm((Imm >> 16) & 0xffff));
+
+ // Following patterns use 2 instructions to materialize the Imm.
+ InstCnt = 2;
+ assert(LZ < 64 && "Unexpected leading zeros here.");
+ // Count of ones follwing the leading zeros.
+ unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
+ // 2-1) Patterns : {zeros}{31-bit value}
+ // {ones}{31-bit value}
+ if (isInt<32>(Imm)) {
+ uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
+ unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
+ return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Imm & 0xffff));
+ }
+ // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
+ // {zeros}{15-bit value}{zeros}
+ // {zeros}{ones}{15-bit value}
+ // {ones}{15-bit value}{zeros}
+ // We can take advantage of LI's sign-extension semantics to generate leading
+ // ones, and then use RLDIC to mask off the ones in both sides after rotation.
+ if ((LZ + FO + TZ) > 48) {
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm((Imm >> TZ) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(TZ), getI32Imm(LZ));
+ }
+ // 2-3) Pattern : {zeros}{15-bit value}{ones}
+ // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
+ // therefore we can take advantage of LI's sign-extension semantics, and then
+ // mask them off after rotation.
+ //
+ // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
+ // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
+ // +------------------------+ +------------------------+
+ // 63 0 63 0
+ // Imm (Imm >> (48 - LZ) & 0xffff)
+ // +----sext-----|--16-bit--+ +clear-|-----------------+
+ // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
+ // +------------------------+ +------------------------+
+ // 63 0 63 0
+ // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
+ if ((LZ + TO) > 48) {
+ // Since the immediates with (LZ > 32) have been handled by previous
+ // patterns, here we have (LZ <= 32) to make sure we will not shift right
+ // the Imm by a negative value.
+ assert(LZ <= 32 && "Unexpected shift value.");
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm((Imm >> (48 - LZ) & 0xffff)));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(48 - LZ), getI32Imm(LZ));
+ }
+ // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
+ // {ones}{15-bit value}{ones}
+ // We can take advantage of LI's sign-extension semantics to generate leading
+ // ones, and then use RLDICL to mask off the ones in left sides (if required)
+ // after rotation.
+ //
+ // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
+ // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
+ // +------------------------+ +------------------------+
+ // 63 0 63 0
+ // Imm (Imm >> TO) & 0xffff
+ // +----sext-----|--16-bit--+ +LZ|---------------------+
+ // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
+ // +------------------------+ +------------------------+
+ // 63 0 63 0
+ // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
+ if ((LZ + FO + TO) > 48) {
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm((Imm >> TO) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(TO), getI32Imm(LZ));
+ }
+ // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
+ // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
+ // value, we can use LI for Lo16 without generating leading ones then add the
+ // Hi16(in Lo32).
+ if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm(Lo32 & 0xffff));
+ return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Lo32 >> 16));
+ }
+ // 2-6) Patterns : {******}{49 zeros}{******}
+ // {******}{49 ones}{******}
+ // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
+ // bits remain on both sides. Rotate right the Imm to construct an int<16>
+ // value, use LI for int<16> value and then use RLDICL without mask to rotate
+ // it back.
+ //
+ // 1) findContiguousZerosAtLeast(Imm, 49)
+ // +------|--zeros-|------+ +---ones--||---15 bit--+
+ // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
+ // +----------------------+ +----------------------+
+ // 63 0 63 0
+ //
+ // 2) findContiguousZerosAtLeast(~Imm, 49)
+ // +------|--ones--|------+ +---ones--||---15 bit--+
+ // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
+ // +----------------------+ +----------------------+
+ // 63 0 63 0
+ if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
+ (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
+ uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift));
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
+ getI32Imm(RotImm & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Shift), getI32Imm(0));
+ }
+
+ // Following patterns use 3 instructions to materialize the Imm.
+ InstCnt = 3;
+ // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
+ // {zeros}{31-bit value}{zeros}
+ // {zeros}{ones}{31-bit value}
+ // {ones}{31-bit value}{zeros}
+ // We can take advantage of LIS's sign-extension semantics to generate leading
+ // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
+ // ones in both sides after rotation.
+ if ((LZ + FO + TZ) > 32) {
+ uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
+ unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm((Imm >> TZ) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(TZ), getI32Imm(LZ));
+ }
+ // 3-2) Pattern : {zeros}{31-bit value}{ones}
+ // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
+ // therefore we can take advantage of LIS's sign-extension semantics, add
+ // the remaining bits with ORI, and then mask them off after rotation.
+ // This is similar to Pattern 2-3, please refer to the diagram there.
+ if ((LZ + TO) > 32) {
+ // Since the immediates with (LZ > 32) have been handled by previous
+ // patterns, here we have (LZ <= 32) to make sure we will not shift right
+ // the Imm by a negative value.
+ assert(LZ <= 32 && "Unexpected shift value.");
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
+ getI32Imm((Imm >> (48 - LZ)) & 0xffff));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm((Imm >> (32 - LZ)) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(32 - LZ), getI32Imm(LZ));
+ }
+ // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
+ // {ones}{31-bit value}{ones}
+ // We can take advantage of LIS's sign-extension semantics to generate leading
+ // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
+ // ones in left sides (if required) after rotation.
+ // This is similar to Pattern 2-4, please refer to the diagram there.
+ if ((LZ + FO + TO) > 32) {
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
+ getI32Imm((Imm >> (TO + 16)) & 0xffff));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm((Imm >> TO) & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(TO), getI32Imm(LZ));
+ }
+ // 3-4) Patterns : High word == Low word
+ if (Hi32 == Lo32) {
+ // Handle the first 32 bits.
+ uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
+ unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Lo32 & 0xffff));
+ // Use rldimi to insert the Low word into High word.
+ SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
+ getI32Imm(0)};
+ return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+ }
+ // 3-5) Patterns : {******}{33 zeros}{******}
+ // {******}{33 ones}{******}
+ // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
+ // bits remain on both sides. Rotate right the Imm to construct an int<32>
+ // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
+ // rotate it back.
+ // This is similar to Pattern 2-6, please refer to the diagram there.
+ if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
+ (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
+ uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift));
+ uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
+ unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(RotImm & 0xffff));
+ return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Shift), getI32Imm(0));
+ }
+
+ InstCnt = 0;
+ return nullptr;
+}
+
+static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
+ unsigned *InstCnt = nullptr) {
+ unsigned InstCntDirect = 0;
+ // No more than 3 instructions is used if we can select the i64 immediate
+ // directly.
+ SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
+ if (Result) {
+ if (InstCnt)
+ *InstCnt = InstCntDirect;
+ return Result;
+ }
+ auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ };
+ // Handle the upper 32 bit value.
+ Result =
+ selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
+ // Add in the last bits as required.
+ if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
+ Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi16));
+ ++InstCntDirect;
+ }
+ if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
+ getI32Imm(Lo16));
+ ++InstCntDirect;
+ }
+ if (InstCnt)
+ *InstCnt = InstCntDirect;
+ return Result;
+}
+
// Select a 64-bit constant.
static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
SDLoc dl(N);
@@ -1253,6 +1218,7 @@ class BitPermutationSelector {
}
break;
case ISD::SHL:
+ case PPCISD::SHL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
@@ -1268,6 +1234,7 @@ class BitPermutationSelector {
}
break;
case ISD::SRL:
+ case PPCISD::SRL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
@@ -2147,11 +2114,14 @@ class BitPermutationSelector {
unsigned NumAndInsts = (unsigned) NeedsRotate +
(unsigned) (bool) Res;
+ unsigned NumOfSelectInsts = 0;
+ selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
+ assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
if (Use32BitInsts)
NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
(unsigned) (ANDIMask != 0 && ANDISMask != 0);
else
- NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
+ NumAndInsts += NumOfSelectInsts + /* and */ 1;
unsigned NumRLInsts = 0;
bool FirstBG = true;
@@ -2375,12 +2345,14 @@ class BitPermutationSelector {
Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
} else {
- if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
-
- SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
- Res =
- SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
- ExtendToInt64(Res, dl), MaskVal), 0);
+ unsigned NumOfSelectInsts = 0;
+ SDValue MaskVal =
+ SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
+ Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
+ ExtendToInt64(Res, dl), MaskVal),
+ 0);
+ if (InstCnt)
+ *InstCnt += NumOfSelectInsts + /* and */ 1;
}
}
@@ -2411,7 +2383,7 @@ class BitPermutationSelector {
}
void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
- BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
+ erase_if(BitGroups, F);
}
SmallVector<ValueBit, 64> Bits;
@@ -3661,6 +3633,12 @@ bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
return false;
+ // For POWER10, it is more profitable to use the set boolean extension
+ // instructions rather than the integer compare elimination codegen.
+ // Users can override this via the command line option, `--ppc-gpr-icmps`.
+ if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
+ return false;
+
switch (N->getOpcode()) {
default: break;
case ISD::ZERO_EXTEND:
@@ -3708,7 +3686,7 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
/// SelectCC - Select a comparison of the specified values with the specified
/// condition code, returning the CR# of the expression.
SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- const SDLoc &dl) {
+ const SDLoc &dl, SDValue Chain) {
// Always select the LHS.
unsigned Opc;
@@ -3861,7 +3839,12 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
assert(Subtarget->hasVSX() && "__float128 requires VSX");
Opc = PPC::XSCMPUQP;
}
- return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+ if (Chain)
+ return SDValue(
+ CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
+ 0);
+ else
+ return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
@@ -3936,7 +3919,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
-// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
+// and v4f32).
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool HasVSX, bool &Swap, bool &Negate) {
Swap = false;
@@ -4017,6 +4001,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPEQUW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPEQUD;
+ else if (VecVT == MVT::v1i128)
+ return PPC::VCMPEQUQ;
break;
case ISD::SETGT:
if (VecVT == MVT::v16i8)
@@ -4027,6 +4013,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTSW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPGTSD;
+ else if (VecVT == MVT::v1i128)
+ return PPC::VCMPGTSQ;
break;
case ISD::SETUGT:
if (VecVT == MVT::v16i8)
@@ -4037,6 +4025,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTUW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPGTUD;
+ else if (VecVT == MVT::v1i128)
+ return PPC::VCMPGTUQ;
break;
default:
break;
@@ -4048,17 +4038,23 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ bool IsStrict = N->isStrictFPOpcode();
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
EVT PtrVT =
CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+
+ SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
+ SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
- if (!Subtarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) {
+ if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
// setcc op, 0
if (Imm == 0) {
- SDValue Op = N->getOperand(0);
+ SDValue Op = LHS;
switch (CC) {
default: break;
case ISD::SETEQ: {
@@ -4093,7 +4089,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
}
}
} else if (Imm == ~0U) { // setcc op, -1
- SDValue Op = N->getOperand(0);
+ SDValue Op = LHS;
switch (CC) {
default: break;
case ISD::SETEQ:
@@ -4136,13 +4132,10 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
}
}
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
- if (LHS.getValueType().isVector()) {
- if (Subtarget->hasQPX() || Subtarget->hasSPE())
+ if (!IsStrict && LHS.getValueType().isVector()) {
+ if (Subtarget->hasSPE())
return false;
EVT VecVT = LHS.getValueType();
@@ -4169,7 +4162,9 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
bool Inv;
unsigned Idx = getCRIdxForSetCC(CC, Inv);
- SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
+ SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
+ if (IsStrict)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
SDValue IntCR;
// SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
@@ -4272,8 +4267,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
(FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
return false;
- bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
- SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
+ SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
+ ? FalseRes
+ : FalseRes.getOperand(0);
+ bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
if (SetOrSelCC.getOpcode() != ISD::SETCC &&
SetOrSelCC.getOpcode() != ISD::SELECT_CC)
return false;
@@ -4382,6 +4379,81 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
return true;
}
+// Return true if it's a software square-root/divide operand.
+static bool isSWTestOp(SDValue N) {
+ if (N.getOpcode() == PPCISD::FTSQRT)
+ return true;
+ if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
+ return false;
+ switch (N.getConstantOperandVal(0)) {
+ case Intrinsic::ppc_vsx_xvtdivdp:
+ case Intrinsic::ppc_vsx_xvtdivsp:
+ case Intrinsic::ppc_vsx_xvtsqrtdp:
+ case Intrinsic::ppc_vsx_xvtsqrtsp:
+ return true;
+ }
+ return false;
+}
+
+bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
+ assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
+ // We are looking for following patterns, where `truncate to i1` actually has
+ // the same semantic with `and 1`.
+ // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
+ // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ if (CC != ISD::SETEQ && CC != ISD::SETNE)
+ return false;
+
+ SDValue CmpRHS = N->getOperand(3);
+ if (!isa<ConstantSDNode>(CmpRHS) ||
+ cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
+ return false;
+
+ SDValue CmpLHS = N->getOperand(2);
+ if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
+ return false;
+
+ unsigned PCC = 0;
+ bool IsCCNE = CC == ISD::SETNE;
+ if (CmpLHS.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(CmpLHS.getOperand(1)))
+ switch (CmpLHS.getConstantOperandVal(1)) {
+ case 1:
+ PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
+ break;
+ case 2:
+ PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 4:
+ PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
+ break;
+ case 8:
+ PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ default:
+ return false;
+ }
+ else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
+ CmpLHS.getValueType() == MVT::i1)
+ PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
+
+ if (PCC) {
+ SDLoc dl(N);
+ SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
+ N->getOperand(0)};
+ CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
+ return true;
+ }
+ return false;
+}
+
bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
unsigned Imm;
@@ -4661,7 +4733,48 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ if (!Subtarget->isISA3_1())
+ break;
+ unsigned Opcode = 0;
+ switch (N->getConstantOperandVal(0)) {
+ default:
+ break;
+ case Intrinsic::ppc_altivec_vstribr_p:
+ Opcode = PPC::VSTRIBR_rec;
+ break;
+ case Intrinsic::ppc_altivec_vstribl_p:
+ Opcode = PPC::VSTRIBL_rec;
+ break;
+ case Intrinsic::ppc_altivec_vstrihr_p:
+ Opcode = PPC::VSTRIHR_rec;
+ break;
+ case Intrinsic::ppc_altivec_vstrihl_p:
+ Opcode = PPC::VSTRIHL_rec;
+ break;
+ }
+ if (!Opcode)
+ break;
+
+ // Generate the appropriate vector string isolate intrinsic to match.
+ EVT VTs[] = {MVT::v16i8, MVT::Glue};
+ SDValue VecStrOp =
+ SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
+ // Vector string isolate instructions update the EQ bit of CR6.
+ // Generate a SETBC instruction to extract the bit and place it in a GPR.
+ SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
+ SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
+ SDValue CRBit = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
+ 0);
+ CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
+ return;
+ }
+
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
if (trySETCC(N))
return;
break;
@@ -4813,8 +4926,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
- case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
- case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
@@ -4961,6 +5072,32 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ case ISD::MUL: {
+ SDValue Op1 = N->getOperand(1);
+ if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)
+ break;
+
+ // If the multiplier fits int16, we can handle it with mulli.
+ int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
+ unsigned Shift = countTrailingZeros<uint64_t>(Imm);
+ if (isInt<16>(Imm) || !Shift)
+ break;
+
+ // If the shifted value fits int16, we can do this transformation:
+ // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
+ // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
+ uint64_t ImmSh = Imm >> Shift;
+ if (isInt<16>(ImmSh)) {
+ uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
+ SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
+ SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
+ N->getOperand(0), SDImm);
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),
+ getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));
+ return;
+ }
+ break;
+ }
// FIXME: Remove this once the ANDI glue bug is fixed:
case PPCISD::ANDI_rec_1_EQ_BIT:
case PPCISD::ANDI_rec_1_GT_BIT: {
@@ -5095,12 +5232,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectCCOp = PPC::SELECT_CC_F16;
else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
- else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
- SelectCCOp = PPC::SELECT_CC_QFRC;
- else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
- SelectCCOp = PPC::SELECT_CC_QSRC;
- else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
- SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
@@ -5192,6 +5323,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case ISD::BR_CC: {
+ if (tryFoldSWTestBRCC(N))
+ return;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
unsigned PCC =
getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
@@ -5856,9 +5989,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
- case PPC::SELECT_QFRC:
- case PPC::SELECT_QSRC:
- case PPC::SELECT_QBRC:
case PPC::SELECT_SPE:
case PPC::SELECT_SPE4:
case PPC::SELECT_VRRC:
@@ -6177,9 +6307,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
- case PPC::SELECT_QFRC:
- case PPC::SELECT_QSRC:
- case PPC::SELECT_QBRC:
case PPC::SELECT_SPE:
case PPC::SELECT_SPE4:
case PPC::SELECT_VRRC:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ddfbd04e1ebc..9215c17cb94b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -74,6 +74,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
@@ -120,6 +121,11 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
cl::desc("use absolute jump tables on ppc"), cl::Hidden);
+// TODO - Remove this option if soft fp128 has been fully supported .
+static cl::opt<bool>
+ EnableSoftFP128("enable-soft-fp128",
+ cl::desc("temp option to enable soft fp128"), cl::Hidden);
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
@@ -145,7 +151,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!useSoftFloat()) {
if (hasSPE()) {
addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
- addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
+ // EFPU2 APU only supports f32
+ if (!Subtarget.hasEFPU2())
+ addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
} else {
addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
@@ -215,13 +223,36 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (isPPC64 || Subtarget.hasFPCVT()) {
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
+
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
+ AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
+ AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
} else {
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
}
@@ -247,6 +278,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// PPC (the libcall is not available).
setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
// We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
@@ -299,8 +332,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
- if (Subtarget.hasVSX())
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal);
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
+ }
if (Subtarget.hasFSQRT()) {
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
@@ -338,6 +373,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FMA , MVT::f32, Legal);
}
+ if (Subtarget.hasSPE())
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
@@ -415,6 +453,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!Subtarget.useCRBits())
setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ if (Subtarget.hasFPU()) {
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
+
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
+ }
+
// PowerPC does not have BRCOND which requires SetCC
if (!Subtarget.useCRBits())
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
@@ -431,9 +479,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
} else {
// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
// PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
}
@@ -561,36 +612,56 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
+
if (Subtarget.has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
// This is just the low 32 bits of a (signed) fp->i64 conversion.
// We cannot do this with Promote because i64 is not a legal type.
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
+ }
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
if (Subtarget.hasSPE()) {
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- } else
+ } else {
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
}
// With the instructions enabled under FPCVT, we can do everything.
if (Subtarget.hasFPCVT()) {
if (Subtarget.has64BitSupport()) {
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
}
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -613,6 +684,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
+ // PowerPC has better expansions for funnel shifts than the generic
+ // TargetLowering::expandFunnelShift.
+ if (Subtarget.has64BitSupport()) {
+ setOperationAction(ISD::FSHL, MVT::i64, Custom);
+ setOperationAction(ISD::FSHR, MVT::i64, Custom);
+ }
+ setOperationAction(ISD::FSHL, MVT::i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::i32, Custom);
+
if (Subtarget.hasVSX()) {
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
@@ -745,9 +825,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
}
- for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
- setOperationAction(ISD::ABS, VT, Custom);
-
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
@@ -767,6 +844,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v4i32,
Subtarget.useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
@@ -776,11 +857,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
- // Without hasP8Altivec set, v2i64 SMAX isn't available.
- // But ABS custom lowering requires SMAX support.
- if (!Subtarget.hasP8Altivec())
- setOperationAction(ISD::ABS, MVT::v2i64, Expand);
-
// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
// With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
@@ -799,7 +875,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
+ if (Subtarget.hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
@@ -809,6 +885,27 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ if (Subtarget.isISA3_1()) {
+ setOperationAction(ISD::MUL, MVT::v2i64, Legal);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
+ setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
+ setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
+ setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
+ setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
+ setOperationAction(ISD::UREM, MVT::v2i64, Legal);
+ setOperationAction(ISD::SREM, MVT::v2i64, Legal);
+ setOperationAction(ISD::UREM, MVT::v4i32, Legal);
+ setOperationAction(ISD::SREM, MVT::v4i32, Legal);
+ setOperationAction(ISD::UREM, MVT::v1i128, Legal);
+ setOperationAction(ISD::SREM, MVT::v1i128, Legal);
+ setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
+ setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
+ setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
+ }
+
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -920,6 +1017,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SUB, MVT::v2i64, Expand);
}
+ if (Subtarget.isISA3_1())
+ setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
+ else
+ setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
+
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
setOperationAction(ISD::STORE, MVT::v2i64, Promote);
@@ -927,6 +1029,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
@@ -935,6 +1041,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Custom handling for partial vectors of integers converted to
// floating point. We already have optimal handling for v2i32 through
// the DAG combine, so those aren't necessary.
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
@@ -966,7 +1080,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
@@ -980,7 +1094,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
@@ -1063,6 +1177,48 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
+ } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
+ addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
+
+ for (MVT FPT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
+
+ setOperationAction(ISD::LOAD, MVT::f128, Promote);
+ setOperationAction(ISD::STORE, MVT::f128, Promote);
+
+ AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
+ AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
+
+ // Set FADD/FSUB as libcall to avoid the legalizer to expand the
+ // fp_to_uint and int_to_fp.
+ setOperationAction(ISD::FADD, MVT::f128, LibCall);
+ setOperationAction(ISD::FSUB, MVT::f128, LibCall);
+
+ setOperationAction(ISD::FMUL, MVT::f128, Expand);
+ setOperationAction(ISD::FDIV, MVT::f128, Expand);
+ setOperationAction(ISD::FNEG, MVT::f128, Expand);
+ setOperationAction(ISD::FABS, MVT::f128, Expand);
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
+
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+
+ // Expand the fp_extend if the target type is fp128.
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
+
+ // Expand the fp_round if the source type is fp128.
+ for (MVT VT : {MVT::f32, MVT::f64}) {
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
+ }
}
if (Subtarget.hasP9Altivec()) {
@@ -1079,164 +1235,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
}
- if (Subtarget.hasQPX()) {
- setOperationAction(ISD::FADD, MVT::v4f64, Legal);
- setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
- setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
- setOperationAction(ISD::FREM, MVT::v4f64, Expand);
-
- setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
- setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
-
- setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
- setOperationAction(ISD::STORE , MVT::v4f64, Custom);
-
- setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
-
- if (!Subtarget.useCRBits())
- setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
- setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
- setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
-
- setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
- setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
-
- setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
-
- setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
- setOperationAction(ISD::FABS , MVT::v4f64, Legal);
- setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
- setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
- setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
- setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
- setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
- setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
- setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
- setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
-
- setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
-
- setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
- setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
-
- addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
-
- setOperationAction(ISD::FADD, MVT::v4f32, Legal);
- setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
- setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
- setOperationAction(ISD::FREM, MVT::v4f32, Expand);
-
- setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
- setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
-
- setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
- setOperationAction(ISD::STORE , MVT::v4f32, Custom);
-
- if (!Subtarget.useCRBits())
- setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
- setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
-
- setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
- setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
-
- setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
- setOperationAction(ISD::FABS , MVT::v4f32, Legal);
- setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
- setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
- setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
- setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
- setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
- setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
- setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
- setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
-
- setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
-
- setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
- setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
-
- addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
-
- setOperationAction(ISD::AND , MVT::v4i1, Legal);
- setOperationAction(ISD::OR , MVT::v4i1, Legal);
- setOperationAction(ISD::XOR , MVT::v4i1, Legal);
-
- if (!Subtarget.useCRBits())
- setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
- setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
-
- setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
- setOperationAction(ISD::STORE , MVT::v4i1, Custom);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
- setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
-
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
-
- addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
-
- setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
- setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
-
- setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
- setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
-
- setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
-
- // These need to set FE_INEXACT, and so cannot be vectorized here.
- setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
- setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
-
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
- setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
-
- setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
- setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
- } else {
- setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
- setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
-
- setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
- setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
- }
-
- // TODO: Handle constrained floating-point operations of v4f64
+ if (Subtarget.pairedVectorMemops()) {
+ addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
+ setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v256i1, Custom);
+ }
+ if (Subtarget.hasMMA()) {
+ addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
+ setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v512i1, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
}
if (Subtarget.has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ if (Subtarget.isISA3_1())
+ setOperationAction(ISD::SRA, MVT::v1i128, Legal);
+
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
if (!isPPC64) {
@@ -1258,6 +1274,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::SRA_I128, nullptr);
}
+ if (!isPPC64)
+ setMaxAtomicSizeInBitsSupported(32);
+
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
@@ -1295,12 +1314,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::SELECT_CC);
}
- // Use reciprocal estimates.
- if (TM.Options.UnsafeFPMath) {
- setTargetDAGCombine(ISD::FDIV);
- setTargetDAGCombine(ISD::FSQRT);
- }
-
if (Subtarget.hasP9Altivec()) {
setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::VSELECT);
@@ -1316,8 +1329,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::POW_F128, "powf128");
setLibcallName(RTLIB::FMIN_F128, "fminf128");
setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
- setLibcallName(RTLIB::POWI_F128, "__powikf2");
setLibcallName(RTLIB::REM_F128, "fmodf128");
+ setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
+ setLibcallName(RTLIB::CEIL_F128, "ceilf128");
+ setLibcallName(RTLIB::FLOOR_F128, "floorf128");
+ setLibcallName(RTLIB::TRUNC_F128, "truncf128");
+ setLibcallName(RTLIB::ROUND_F128, "roundf128");
+ setLibcallName(RTLIB::LROUND_F128, "lroundf128");
+ setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
+ setLibcallName(RTLIB::RINT_F128, "rintf128");
+ setLibcallName(RTLIB::LRINT_F128, "lrintf128");
+ setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
+ setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
+ setLibcallName(RTLIB::FMA_F128, "fmaf128");
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
@@ -1380,6 +1404,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxLoadsPerMemcmpOptSize = 4;
}
+ IsStrictFPEnabled = true;
+
// Let the subtarget (CPU) decide if a predictable select is more expensive
// than the corresponding branch. This information is used in CGP to decide
// when to convert selects into branches.
@@ -1422,8 +1448,8 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
- if (Subtarget.hasAltivec() || Subtarget.hasQPX())
- getMaxByValAlign(Ty, Alignment, Subtarget.hasQPX() ? Align(32) : Align(16));
+ if (Subtarget.hasAltivec())
+ getMaxByValAlign(Ty, Alignment, Align(16));
return Alignment.value();
}
@@ -1439,16 +1465,6 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
-/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
-/// type is cheaper than a multiply followed by a shift.
-/// This is true for words and doublewords on 64-bit PowerPC.
-bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
- if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
- isOperationLegal(ISD::MULHU, Type)))
- return true;
- return TargetLowering::isMulhCheaperThanMulShift(Type);
-}
-
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -1469,6 +1485,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "PPCISD::FP_TO_SINT_IN_VSR";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
+ case PPCISD::FTSQRT:
+ return "PPCISD::FTSQRT";
+ case PPCISD::FSQRT:
+ return "PPCISD::FSQRT";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
@@ -1516,7 +1536,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ANDI_rec_1_GT_BIT:
return "PPCISD::ANDI_rec_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
- case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
@@ -1553,6 +1573,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::PADDI_DTPREL:
+ return "PPCISD::PADDI_DTPREL";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
case PPCISD::SC: return "PPCISD::SC";
case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
@@ -1561,12 +1583,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
case PPCISD::VABSD: return "PPCISD::VABSD";
- case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
- case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
- case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
- case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
- case PPCISD::QBFLT: return "PPCISD::QBFLT";
- case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
@@ -1574,8 +1590,35 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
+ case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
+ return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
+ case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
+ return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
+ case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
+ case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
+ case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
+ case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
+ case PPCISD::STRICT_FADDRTZ:
+ return "PPCISD::STRICT_FADDRTZ";
+ case PPCISD::STRICT_FCTIDZ:
+ return "PPCISD::STRICT_FCTIDZ";
+ case PPCISD::STRICT_FCTIWZ:
+ return "PPCISD::STRICT_FCTIWZ";
+ case PPCISD::STRICT_FCTIDUZ:
+ return "PPCISD::STRICT_FCTIDUZ";
+ case PPCISD::STRICT_FCTIWUZ:
+ return "PPCISD::STRICT_FCTIWUZ";
+ case PPCISD::STRICT_FCFID:
+ return "PPCISD::STRICT_FCFID";
+ case PPCISD::STRICT_FCFIDU:
+ return "PPCISD::STRICT_FCFIDU";
+ case PPCISD::STRICT_FCFIDS:
+ return "PPCISD::STRICT_FCFIDS";
+ case PPCISD::STRICT_FCFIDUS:
+ return "PPCISD::STRICT_FCFIDUS";
+ case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
}
return nullptr;
}
@@ -1585,9 +1628,6 @@ EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
- if (Subtarget.hasQPX())
- return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
-
return VT.changeVectorElementTypeToInteger();
}
@@ -2361,36 +2401,6 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
return SDValue();
}
-/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
-/// amount, otherwise return -1.
-int PPC::isQVALIGNIShuffleMask(SDNode *N) {
- EVT VT = N->getValueType(0);
- if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
- return -1;
-
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-
- // Find the first non-undef value in the shuffle mask.
- unsigned i;
- for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
- /*search*/;
-
- if (i == 4) return -1; // all undef.
-
- // Otherwise, check to see if the rest of the elements are consecutively
- // numbered from this value.
- unsigned ShiftAmt = SVOp->getMaskElt(i);
- if (ShiftAmt < i) return -1;
- ShiftAmt -= i;
-
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 4; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
- return -1;
-
- return ShiftAmt;
-}
-
//===----------------------------------------------------------------------===//
// Addressing Mode Selection
//===----------------------------------------------------------------------===//
@@ -2432,6 +2442,20 @@ bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
return false;
}
+/// isIntS34Immediate - This method tests if value of node given can be
+/// accurately represented as a sign extension from a 34-bit value. If so,
+/// this returns true and the immediate.
+bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+ return isInt<34>(Imm);
+}
+bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
+ return isIntS34Immediate(Op.getNode(), Imm);
+}
+
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
@@ -2632,6 +2656,55 @@ bool PPCTargetLowering::SelectAddressRegImm(
return true; // [r+0]
}
+/// Similar to the 16-bit case but for instructions that take a 34-bit
+/// displacement field (prefixed loads/stores).
+bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // Only on 64-bit targets.
+ if (N.getValueType() != MVT::i64)
+ return false;
+
+ SDLoc dl(N);
+ int64_t Imm = 0;
+
+ if (N.getOpcode() == ISD::ADD) {
+ if (!isIntS34Immediate(N.getOperand(1), Imm))
+ return false;
+ Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N.getOperand(0);
+ return true;
+ }
+
+ if (N.getOpcode() == ISD::OR) {
+ if (!isIntS34Immediate(N.getOperand(1), Imm))
+ return false;
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
+ if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
+ return false;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+ return true;
+ }
+
+ if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
+ Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
+ Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
+ return true;
+ }
+
+ return false;
+}
+
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
@@ -2761,16 +2834,9 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
return false;
}
- // PowerPC doesn't have preinc load/store instructions for vectors (except
- // for QPX, which does have preinc r+r forms).
- if (VT.isVector()) {
- if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
- return false;
- } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
- AM = ISD::PRE_INC;
- return true;
- }
- }
+ // PowerPC doesn't have preinc load/store instructions for vectors
+ if (VT.isVector())
+ return false;
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
// Common code will reject creating a pre-inc form if the base pointer
@@ -3065,6 +3131,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
TLSModel::Model Model = TM.getTLSModel(GV);
if (Model == TLSModel::LocalExec) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
+ SDValue MatAddr =
+ DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
+ }
+
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
@@ -3077,29 +3152,44 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::InitialExec) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLS);
- SDValue GOTPtr;
- if (is64bit) {
- setUsesTOCBasePtr(DAG);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
- PtrVT, GOTReg, TGA);
+ bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(
+ GV, dl, PtrVT, 0,
+ IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
+ SDValue TPOffset;
+ if (IsPCRel) {
+ SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
+ TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
+ MachinePointerInfo());
} else {
- if (!TM.isPositionIndependent())
- GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
- else if (picLevel == PICLevel::SmallPIC)
- GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
- else
- GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ SDValue GOTPtr;
+ if (is64bit) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr =
+ DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
+ } else {
+ if (!TM.isPositionIndependent())
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+ else if (picLevel == PICLevel::SmallPIC)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
+ TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
}
- SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
- PtrVT, TGA, GOTPtr);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
if (Model == TLSModel::GeneralDynamic) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_GOT_TLSGD_PCREL_FLAG);
+ return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
+ }
+
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
@@ -3118,6 +3208,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::LocalDynamic) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_GOT_TLSLD_PCREL_FLAG);
+ SDValue MatPCRel =
+ DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
+ return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
+ }
+
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
@@ -3492,11 +3590,6 @@ static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
PPC::F11, PPC::F12, PPC::F13};
-/// QFPR - The set of QPX registers that should be allocated for arguments.
-static const MCPhysReg QFPR[] = {
- PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
-
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
@@ -3526,10 +3619,6 @@ static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
Alignment = Align(16);
- // QPX vector types stored in double-precision are padded to a 32 byte
- // boundary.
- else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
- Alignment = Align(32);
// ByVal parameters are aligned as requested.
if (Flags.isByVal()) {
@@ -3561,14 +3650,11 @@ static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
/// stack slot (instead of being passed in registers). ArgOffset,
/// AvailableFPRs, and AvailableVRs must hold the current argument
/// position, and will be updated to account for this argument.
-static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
- ISD::ArgFlagsTy Flags,
- unsigned PtrByteSize,
- unsigned LinkageSize,
- unsigned ParamAreaSize,
- unsigned &ArgOffset,
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize, unsigned LinkageSize,
+ unsigned ParamAreaSize, unsigned &ArgOffset,
unsigned &AvailableFPRs,
- unsigned &AvailableVRs, bool HasQPX) {
+ unsigned &AvailableVRs) {
bool UseMemory = false;
// Respect alignment of argument on the stack.
@@ -3592,11 +3678,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
// However, if the argument is actually passed in an FPR or a VR,
// we don't use memory after all.
if (!Flags.isByVal()) {
- if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
- // QPX registers overlap with the scalar FP registers.
- (HasQPX && (ArgVT == MVT::v4f32 ||
- ArgVT == MVT::v4f64 ||
- ArgVT == MVT::v4i1)))
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
if (AvailableFPRs > 0) {
--AvailableFPRs;
return false;
@@ -3631,11 +3713,8 @@ SDValue PPCTargetLowering::LowerFormalArguments(
if (Subtarget.is64BitELFABI())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
- if (Subtarget.is32BitELFABI())
- return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
- InVals);
-
- return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ assert(Subtarget.is32BitELFABI());
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
}
@@ -3735,18 +3814,12 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::VRRCRegClass;
break;
case MVT::v4f32:
- RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
+ RC = &PPC::VRRCRegClass;
break;
case MVT::v2f64:
case MVT::v2i64:
RC = &PPC::VRRCRegClass;
break;
- case MVT::v4f64:
- RC = &PPC::QFRCRegClass;
- break;
- case MVT::v4i1:
- RC = &PPC::QBRCRegClass;
- break;
}
SDValue ArgValue;
@@ -3945,7 +4018,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
- const unsigned Num_QFPR_Regs = Num_FPR_Regs;
// Do a first pass over the arguments to determine whether the ABI
// guarantees that our caller has allocated the parameter save area
@@ -3964,8 +4036,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytes, AvailableFPRs, AvailableVRs,
- Subtarget.hasQPX()))
+ NumBytes, AvailableFPRs, AvailableVRs))
HasParameterArea = true;
}
@@ -3975,7 +4046,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned ArgOffset = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- unsigned &QFPR_idx = FPR_idx;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
@@ -4218,51 +4288,20 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v2i64:
case MVT::v1i128:
case MVT::f128:
- if (!Subtarget.hasQPX()) {
- // These can be scalar arguments or elements of a vector array type
- // passed directly. The latter are used to implement ELFv2 homogenous
- // vector aggregates.
- if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++VR_idx;
- } else {
- if (CallConv == CallingConv::Fast)
- ComputeArgOffset();
- needsLoad = true;
- }
- if (CallConv != CallingConv::Fast || needsLoad)
- ArgOffset += 16;
- break;
- } // not QPX
-
- assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
- "Invalid QPX parameter type");
- LLVM_FALLTHROUGH;
-
- case MVT::v4f64:
- case MVT::v4i1:
- // QPX vectors are treated like their scalar floating-point subregisters
- // (except that they're larger).
- unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
- if (QFPR_idx != Num_QFPR_Regs) {
- const TargetRegisterClass *RC;
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
- case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
- default: RC = &PPC::QBRCRegClass; break;
- }
-
- unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++QFPR_idx;
+ ++VR_idx;
} else {
if (CallConv == CallingConv::Fast)
ComputeArgOffset();
needsLoad = true;
}
if (CallConv != CallingConv::Fast || needsLoad)
- ArgOffset += Sz;
+ ArgOffset += 16;
break;
}
@@ -4329,366 +4368,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
return Chain;
}
-SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- // TODO: add description of PPC stack frame format, or at least some docs.
- //
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo &MFI = MF.getFrameInfo();
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
-
- EVT PtrVT = getPointerTy(MF.getDataLayout());
- bool isPPC64 = PtrVT == MVT::i64;
- // Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
- (CallConv == CallingConv::Fast));
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
- unsigned ArgOffset = LinkageSize;
- // Area that is at least reserved in caller of this function.
- unsigned MinReservedArea = ArgOffset;
-
- static const MCPhysReg GPR_32[] = { // 32-bit registers.
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- static const MCPhysReg GPR_64[] = { // 64-bit registers.
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10,
- };
- static const MCPhysReg VR[] = {
- PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
- PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
- };
-
- const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
- const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
- const unsigned Num_VR_Regs = array_lengthof( VR);
-
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
-
- const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
-
- // In 32-bit non-varargs functions, the stack space for vectors is after the
- // stack space for non-vectors. We do not use this space unless we have
- // too many vectors to fit in registers, something that only occurs in
- // constructed examples:), but we have to walk the arglist to figure
- // that out...for the pathological case, compute VecArgOffset as the
- // start of the vector parameter area. Computing VecArgOffset is the
- // entire point of the following loop.
- unsigned VecArgOffset = ArgOffset;
- if (!isVarArg && !isPPC64) {
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
- ++ArgNo) {
- EVT ObjectVT = Ins[ArgNo].VT;
- ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
-
- if (Flags.isByVal()) {
- // ObjSize is the true size, ArgSize rounded up to multiple of regs.
- unsigned ObjSize = Flags.getByValSize();
- unsigned ArgSize =
- ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
- VecArgOffset += ArgSize;
- continue;
- }
-
- switch(ObjectVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unhandled argument type!");
- case MVT::i1:
- case MVT::i32:
- case MVT::f32:
- VecArgOffset += 4;
- break;
- case MVT::i64: // PPC64
- case MVT::f64:
- // FIXME: We are guaranteed to be !isPPC64 at this point.
- // Does MVT::i64 apply?
- VecArgOffset += 8;
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- // Nothing to do, we're only looking at Nonvector args here.
- break;
- }
- }
- }
- // We've found where the vector parameter area in memory is. Skip the
- // first 12 parameters; these don't use that memory.
- VecArgOffset = ((VecArgOffset+15)/16)*16;
- VecArgOffset += 12*16;
-
- // Add DAG nodes to load the arguments or copy them out of registers. On
- // entry to a function on PPC, the arguments start after the linkage area,
- // although the first ones are often in registers.
-
- SmallVector<SDValue, 8> MemOps;
- unsigned nAltivecParamsAtEnd = 0;
- Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
- unsigned CurArgIdx = 0;
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
- SDValue ArgVal;
- bool needsLoad = false;
- EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
- unsigned ArgSize = ObjSize;
- ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
- if (Ins[ArgNo].isOrigArg()) {
- std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
- CurArgIdx = Ins[ArgNo].getOrigArgIndex();
- }
- unsigned CurArgOffset = ArgOffset;
-
- // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
- if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
- if (isVarArg || isPPC64) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += CalculateStackSlotSize(ObjectVT,
- Flags,
- PtrByteSize);
- } else nAltivecParamsAtEnd++;
- } else
- // Calculate min reserved area.
- MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
- Flags,
- PtrByteSize);
-
- // FIXME the codegen can be much improved in some cases.
- // We do not have to keep everything in memory.
- if (Flags.isByVal()) {
- assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
-
- // ObjSize is the true size, ArgSize rounded up to multiple of registers.
- ObjSize = Flags.getByValSize();
- ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
- // Objects of size 1 and 2 are right justified, everything else is
- // left justified. This means the memory address is adjusted forwards.
- if (ObjSize==1 || ObjSize==2) {
- CurArgOffset = CurArgOffset + (4 - ObjSize);
- }
- // The value of the object is its address.
- int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(FIN);
- if (ObjSize==1 || ObjSize==2) {
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- if (isPPC64)
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- else
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
- SDValue Store =
- DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(&*FuncArg), ObjType);
- MemOps.push_back(Store);
- ++GPR_idx;
- }
-
- ArgOffset += PtrByteSize;
-
- continue;
- }
- for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
- // Store whatever pieces of the object are in registers
- // to memory. ArgOffset will be the address of the beginning
- // of the object.
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- if (isPPC64)
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- else
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(&*FuncArg, j));
- MemOps.push_back(Store);
- ++GPR_idx;
- ArgOffset += PtrByteSize;
- } else {
- ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
- break;
- }
- }
- continue;
- }
-
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unhandled argument type!");
- case MVT::i1:
- case MVT::i32:
- if (!isPPC64) {
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-
- if (ObjectVT == MVT::i1)
- ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
-
- ++GPR_idx;
- } else {
- needsLoad = true;
- ArgSize = PtrByteSize;
- }
- // All int arguments reserve stack space in the Darwin ABI.
- ArgOffset += PtrByteSize;
- break;
- }
- LLVM_FALLTHROUGH;
- case MVT::i64: // PPC64
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
-
- if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
- // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
- // value to MVT::i64 and then truncate to the correct register size.
- ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
-
- ++GPR_idx;
- } else {
- needsLoad = true;
- ArgSize = PtrByteSize;
- }
- // All int arguments reserve stack space in the Darwin ABI.
- ArgOffset += 8;
- break;
-
- case MVT::f32:
- case MVT::f64:
- // Every 4 bytes of argument space consumes one of the GPRs available for
- // argument passing.
- if (GPR_idx != Num_GPR_Regs) {
- ++GPR_idx;
- if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
- ++GPR_idx;
- }
- if (FPR_idx != Num_FPR_Regs) {
- unsigned VReg;
-
- if (ObjectVT == MVT::f32)
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
- else
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
-
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++FPR_idx;
- } else {
- needsLoad = true;
- }
-
- // All FP arguments reserve stack space in the Darwin ABI.
- ArgOffset += isPPC64 ? 8 : ObjSize;
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- // Note that vector arguments in registers don't reserve stack space,
- // except in varargs functions.
- if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- if (isVarArg) {
- while ((ArgOffset % 16) != 0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != Num_GPR_Regs)
- GPR_idx++;
- }
- ArgOffset += 16;
- GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
- }
- ++VR_idx;
- } else {
- if (!isVarArg && !isPPC64) {
- // Vectors go after all the nonvectors.
- CurArgOffset = VecArgOffset;
- VecArgOffset += 16;
- } else {
- // Vectors are aligned.
- ArgOffset = ((ArgOffset+15)/16)*16;
- CurArgOffset = ArgOffset;
- ArgOffset += 16;
- }
- needsLoad = true;
- }
- break;
- }
-
- // We need to load the argument to a virtual register if we determined above
- // that we ran out of physical registers of the appropriate type.
- if (needsLoad) {
- int FI = MFI.CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize),
- isImmutable);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
- }
-
- InVals.push_back(ArgVal);
- }
-
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
-
- // Area that is at least reserved in the caller of this function.
- MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
-
- // Set the size that is at least reserved in caller of this function. Tail
- // call optimized functions' reserved stack space needs to be aligned so that
- // taking the difference between two stack areas will result in an aligned
- // stack.
- MinReservedArea =
- EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
- FuncInfo->setMinReservedArea(MinReservedArea);
-
- // If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg) {
- int Depth = ArgOffset;
-
- FuncInfo->setVarArgsFrameIndex(
- MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
- Depth, true));
- SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
-
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by dereferencing
- // the result of va_next.
- for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
- unsigned VReg;
-
- if (isPPC64)
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- else
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
-
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
- MemOps.push_back(Store);
- // Increment the address by four for the next argument to store
- SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
- FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
- }
- }
-
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
-
- return Chain;
-}
-
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
@@ -4759,6 +4438,13 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
if (STICallee->isUsingPCRelativeCalls())
return false;
+ // If the GV is not a strong definition then we need to assume it can be
+ // replaced by another function at link time. The function that replaces
+ // it may not share the same TOC as the caller since the callee may be
+ // replaced by a PC Relative version of the same function.
+ if (!GV->isStrongDefinitionForLinker())
+ return false;
+
// The medium and large code models are expected to provide a sufficiently
// large TOC to provide all data addressing needs of a module with a
// single TOC.
@@ -4766,12 +4452,6 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
CodeModel::Large == TM.getCodeModel())
return true;
- // Otherwise we need to ensure callee and caller are in the same section,
- // since the linker may allocate multiple TOCs, and we don't know which
- // sections will belong to the same TOC base.
- if (!GV->isStrongDefinitionForLinker())
- return false;
-
// Any explicitly-specified sections and section prefixes must also match.
// Also, if we're using -ffunction-sections, then each function is always in
// a different section (the same is true for COMDAT functions).
@@ -4815,10 +4495,9 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
for (const ISD::OutputArg& Param : Outs) {
if (Param.Flags.isNest()) continue;
- if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
- PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytes, AvailableFPRs, AvailableVRs,
- Subtarget.hasQPX()))
+ if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
+ LinkageSize, ParamAreaSize, NumBytes,
+ AvailableFPRs, AvailableVRs))
return true;
}
return false;
@@ -5332,66 +5011,53 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
Subtarget.is32BitELFABI() && !isLocalCallee() &&
Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
- // On AIX, direct function calls reference the symbol for the function's
- // entry point, which is named by prepending a "." before the function's
- // C-linkage name.
- const auto getAIXFuncEntryPointSymbolSDNode =
- [&](StringRef FuncName, bool IsDeclaration,
- const XCOFF::StorageClass &SC) {
- auto &Context = DAG.getMachineFunction().getMMI().getContext();
-
- MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
- Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
-
- if (IsDeclaration && !S->hasRepresentedCsectSet()) {
- // On AIX, an undefined symbol needs to be associated with a
- // MCSectionXCOFF to get the correct storage mapping class.
- // In this case, XCOFF::XMC_PR.
- MCSectionXCOFF *Sec = Context.getXCOFFSection(
- S->getSymbolTableName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
- SectionKind::getMetadata());
- S->setRepresentedCsect(Sec);
- }
+ const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
+ const TargetMachine &TM = Subtarget.getTargetMachine();
+ const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
+ MCSymbolXCOFF *S =
+ cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
- MVT PtrVT =
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- return DAG.getMCSymbol(S, PtrVT);
- };
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ return DAG.getMCSymbol(S, PtrVT);
+ };
if (isFunctionGlobalAddress(Callee)) {
- const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
- const GlobalValue *GV = G->getGlobal();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
- if (!Subtarget.isAIXABI())
- return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
- UsePlt ? PPCII::MO_PLT : 0);
-
- assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
- const GlobalObject *GO = cast<GlobalObject>(GV);
- const XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
- return getAIXFuncEntryPointSymbolSDNode(GO->getName(), GO->isDeclaration(),
- SC);
+ if (Subtarget.isAIXABI()) {
+ assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
+ return getAIXFuncEntryPointSymbolSDNode(GV);
+ }
+ return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
+ UsePlt ? PPCII::MO_PLT : 0);
}
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *SymName = S->getSymbol();
- if (!Subtarget.isAIXABI())
- return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
- UsePlt ? PPCII::MO_PLT : 0);
+ if (Subtarget.isAIXABI()) {
+ // If there exists a user-declared function whose name is the same as the
+ // ExternalSymbol's, then we pick up the user-declared version.
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ if (const Function *F =
+ dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
+ return getAIXFuncEntryPointSymbolSDNode(F);
+
+ // On AIX, direct function calls reference the symbol for the function's
+ // entry point, which is named by prepending a "." before the function's
+ // C-linkage name. A Qualname is returned here because an external
+ // function entry point is a csect with XTY_ER property.
+ const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
+ auto &Context = DAG.getMachineFunction().getMMI().getContext();
+ MCSectionXCOFF *Sec = Context.getXCOFFSection(
+ (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
+ SectionKind::getMetadata());
+ return Sec->getQualNameSymbol();
+ };
- // If there exists a user-declared function whose name is the same as the
- // ExternalSymbol's, then we pick up the user-declared version.
- const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
- if (const Function *F =
- dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) {
- const XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F);
- return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(),
- SC);
+ SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
}
-
- return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
+ return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
+ UsePlt ? PPCII::MO_PLT : 0);
}
// No transformation needed.
@@ -5736,19 +5402,15 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
CLI.NoMerge);
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
- return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
- InVals, CB);
-
- if (Subtarget.isSVR4ABI())
- return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
- InVals, CB);
-
if (Subtarget.isAIXABI())
return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
InVals, CB);
- return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ assert(Subtarget.isSVR4ABI());
+ if (Subtarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
+ return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
InVals, CB);
}
@@ -6045,7 +5707,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned NumBytes = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- unsigned &QFPR_idx = FPR_idx;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -6059,7 +5720,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
const unsigned NumVRs = array_lengthof(VR);
- const unsigned NumQFPRs = NumFPRs;
// On ELFv2, we can avoid allocating the parameter area if all the arguments
// can be passed to the callee in registers.
@@ -6074,9 +5734,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
for (unsigned i = 0; i != NumOps; ++i) {
if (Outs[i].Flags.isNest()) continue;
if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
- PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytesTmp, AvailableFPRs, AvailableVRs,
- Subtarget.hasQPX()))
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytesTmp, AvailableFPRs, AvailableVRs))
HasParameterArea = true;
}
}
@@ -6124,20 +5783,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
continue;
break;
case MVT::v4f32:
- // When using QPX, this is handled like a FP register, otherwise, it
- // is an Altivec register.
- if (Subtarget.hasQPX()) {
- if (++NumFPRsUsed <= NumFPRs)
- continue;
- } else {
- if (++NumVRsUsed <= NumVRs)
- continue;
- }
+ if (++NumVRsUsed <= NumVRs)
+ continue;
break;
case MVT::f32:
case MVT::f64:
- case MVT::v4f64: // QPX
- case MVT::v4i1: // QPX
if (++NumFPRsUsed <= NumFPRs)
continue;
break;
@@ -6499,7 +6149,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v2i64:
case MVT::v1i128:
case MVT::f128:
- if (!Subtarget.hasQPX()) {
// These can be scalar arguments or elements of a vector array type
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
@@ -6555,63 +6204,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (!IsFastCall)
ArgOffset += 16;
break;
- } // not QPX
-
- assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
- "Invalid QPX parameter type");
-
- LLVM_FALLTHROUGH;
- case MVT::v4f64:
- case MVT::v4i1: {
- bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
- if (CFlags.IsVarArg) {
- assert(HasParameterArea &&
- "Parameter area must exist if we have a varargs call.");
- // We could elide this store in the case where the object fits
- // entirely in R registers. Maybe later.
- SDValue Store =
- DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Store);
- if (QFPR_idx != NumQFPRs) {
- SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
- PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
- }
- ArgOffset += (IsF32 ? 16 : 32);
- for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
- if (GPR_idx == NumGPRs)
- break;
- SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
- DAG.getConstant(i, dl, PtrVT));
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- break;
- }
-
- // Non-varargs QPX params go into registers or on the stack.
- if (QFPR_idx != NumQFPRs) {
- RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
- } else {
- if (IsFastCall)
- ComputePtrOff();
-
- assert(HasParameterArea &&
- "Parameter area must exist to pass an argument in memory.");
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, CFlags.IsTailCall, true, MemOpChains,
- TailCallArguments, dl);
- if (IsFastCall)
- ArgOffset += (IsF32 ? 16 : 32);
- }
-
- if (!IsFastCall)
- ArgOffset += (IsF32 ? 16 : 32);
- break;
- }
}
}
@@ -6665,384 +6257,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
-SDValue PPCTargetLowering::LowerCall_Darwin(
- SDValue Chain, SDValue Callee, CallFlags CFlags,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- const CallBase *CB) const {
- unsigned NumOps = Outs.size();
-
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- bool isPPC64 = PtrVT == MVT::i64;
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
- MachineFunction &MF = DAG.getMachineFunction();
-
- // Mark this function as potentially containing a function that contains a
- // tail call. As a consequence the frame pointer will be used for dynamicalloc
- // and restoring the callers stack pointer in this functions epilog. This is
- // done because by tail calling the called function might overwrite the value
- // in this function's (MF) stack pointer stack slot 0(SP).
- if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CFlags.CallConv == CallingConv::Fast)
- MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
-
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with 24/48 bytes, which is
- // prereserved space for [SP][CR][LR][3 x unused].
- unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
- unsigned NumBytes = LinkageSize;
-
- // Add up all the space actually used.
- // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
- // they all go in registers, but we must reserve stack space for them for
- // possible use by the caller. In varargs or 64-bit calls, parameters are
- // assigned stack space in order, with padding so Altivec parameters are
- // 16-byte aligned.
- unsigned nAltivecParamsAtEnd = 0;
- for (unsigned i = 0; i != NumOps; ++i) {
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- EVT ArgVT = Outs[i].VT;
- // Varargs Altivec parameters are padded to a 16 byte boundary.
- if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
- ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
- ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
- if (!CFlags.IsVarArg && !isPPC64) {
- // Non-varargs Altivec parameters go after all the non-Altivec
- // parameters; handle those later so we know how much padding we need.
- nAltivecParamsAtEnd++;
- continue;
- }
- // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
- NumBytes = ((NumBytes+15)/16)*16;
- }
- NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
- }
-
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- NumBytes = ((NumBytes+15)/16)*16;
- NumBytes += 16*nAltivecParamsAtEnd;
- }
-
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
-
- // Tail call needs the stack to be aligned.
- if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CFlags.CallConv == CallingConv::Fast)
- NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
-
- // Calculate by how many bytes the stack has to be adjusted in case of tail
- // call optimization.
- int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
-
- // To protect arguments on the stack from being clobbered in a tail call,
- // force all the loads to happen before doing any other lowering.
- if (CFlags.IsTailCall)
- Chain = DAG.getStackArgumentTokenFactor(Chain);
-
- // Adjust the stack pointer for the new arguments...
- // These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
- SDValue CallSeqStart = Chain;
-
- // Load the return address and frame pointer so it can be move somewhere else
- // later.
- SDValue LROp, FPOp;
- Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
-
- // Set up a copy of the stack pointer for use loading and storing any
- // arguments that may not fit in the registers available for argument
- // passing.
- SDValue StackPtr;
- if (isPPC64)
- StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
- else
- StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
-
- // Figure out which arguments are going to go in registers, and which in
- // memory. Also, if this is a vararg function, floating point operations
- // must be stored to our stack, and loaded into integer regs as well, if
- // any integer regs are available for argument passing.
- unsigned ArgOffset = LinkageSize;
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
-
- static const MCPhysReg GPR_32[] = { // 32-bit registers.
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- static const MCPhysReg GPR_64[] = { // 64-bit registers.
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10,
- };
- static const MCPhysReg VR[] = {
- PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
- PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
- };
- const unsigned NumGPRs = array_lengthof(GPR_32);
- const unsigned NumFPRs = 13;
- const unsigned NumVRs = array_lengthof(VR);
-
- const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
-
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
- SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
-
- SmallVector<SDValue, 8> MemOpChains;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
-
- // PtrOff will be used to store the current argument to the stack if a
- // register cannot be found for it.
- SDValue PtrOff;
-
- PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
-
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-
- // On PPC64, promote integers to 64-bit values.
- if (isPPC64 && Arg.getValueType() == MVT::i32) {
- // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
- unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
- }
-
- // FIXME memcpy is used way more than necessary. Correctness first.
- // Note: "by value" is code for passing a structure by value, not
- // basic types.
- if (Flags.isByVal()) {
- unsigned Size = Flags.getByValSize();
- // Very small objects are passed right-justified. Everything else is
- // passed left-justified.
- if (Size==1 || Size==2) {
- EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
- if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
- MachinePointerInfo(), VT);
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
-
- ArgOffset += PtrByteSize;
- } else {
- SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
- PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
- Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
- CallSeqStart,
- Flags, DAG, dl);
- ArgOffset += PtrByteSize;
- }
- continue;
- }
- // Copy entire object into memory. There are cases where gcc-generated
- // code assumes it is there, even if it could be put entirely into
- // registers. (This is not what the doc says.)
- Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
- CallSeqStart,
- Flags, DAG, dl);
-
- // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
- // copy the pieces of the object that fit into registers from the
- // parameter save area.
- for (unsigned j=0; j<Size; j+=PtrByteSize) {
- SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
- SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
- if (GPR_idx != NumGPRs) {
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- ArgOffset += PtrByteSize;
- } else {
- ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
- break;
- }
- }
- continue;
- }
-
- switch (Arg.getSimpleValueType().SimpleTy) {
- default: llvm_unreachable("Unexpected ValueType for argument!");
- case MVT::i1:
- case MVT::i32:
- case MVT::i64:
- if (GPR_idx != NumGPRs) {
- if (Arg.getValueType() == MVT::i1)
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
-
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
- } else {
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, CFlags.IsTailCall, false, MemOpChains,
- TailCallArguments, dl);
- }
- ArgOffset += PtrByteSize;
- break;
- case MVT::f32:
- case MVT::f64:
- if (FPR_idx != NumFPRs) {
- RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
-
- if (CFlags.IsVarArg) {
- SDValue Store =
- DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Store);
-
- // Float varargs are always shadowed in available integer registers
- if (GPR_idx != NumGPRs) {
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
- SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- } else {
- // If we have any FPRs remaining, we may also have GPRs remaining.
- // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
- // GPRs.
- if (GPR_idx != NumGPRs)
- ++GPR_idx;
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
- !isPPC64) // PPC64 has 64-bit GPR's obviously :)
- ++GPR_idx;
- }
- } else
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, CFlags.IsTailCall, false, MemOpChains,
- TailCallArguments, dl);
- if (isPPC64)
- ArgOffset += 8;
- else
- ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- if (CFlags.IsVarArg) {
- // These go aligned on the stack, or in the corresponding R registers
- // when within range. The Darwin PPC ABI doc claims they also go in
- // V registers; in fact gcc does this only for arguments that are
- // prototyped, not for those that match the ... We do it for all
- // arguments, seems to work.
- while (ArgOffset % 16 !=0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != NumGPRs)
- GPR_idx++;
- }
- // We could elide this store in the case where the object fits
- // entirely in R registers. Maybe later.
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, dl, PtrVT));
- SDValue Store =
- DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Store);
- if (VR_idx != NumVRs) {
- SDValue Load =
- DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
- }
- ArgOffset += 16;
- for (unsigned i=0; i<16; i+=PtrByteSize) {
- if (GPR_idx == NumGPRs)
- break;
- SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
- DAG.getConstant(i, dl, PtrVT));
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- break;
- }
-
- // Non-varargs Altivec params generally go in registers, but have
- // stack space allocated at the end.
- if (VR_idx != NumVRs) {
- // Doesn't have GPR space allocated.
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
- } else if (nAltivecParamsAtEnd==0) {
- // We are emitting Altivec params in order.
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, CFlags.IsTailCall, true, MemOpChains,
- TailCallArguments, dl);
- ArgOffset += 16;
- }
- break;
- }
- }
- // If all Altivec parameters fit in registers, as they usually do,
- // they get stack space following the non-Altivec parameters. We
- // don't track this here because nobody below needs it.
- // If there are more Altivec parameters than fit in registers emit
- // the stores here.
- if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
- unsigned j = 0;
- // Offset is aligned; skip 1st 12 params which go in V registers.
- ArgOffset = ((ArgOffset+15)/16)*16;
- ArgOffset += 12*16;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
- EVT ArgType = Outs[i].VT;
- if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
- ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
- if (++j > NumVRs) {
- SDValue PtrOff;
- // We are emitting Altivec params in order.
- LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, CFlags.IsTailCall, true, MemOpChains,
- TailCallArguments, dl);
- ArgOffset += 16;
- }
- }
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
-
- // On Darwin, R12 must contain the address of an indirect callee. This does
- // not mean the MTCTR instruction must use R12; it's easier to model this as
- // an extra parameter, so do that.
- if (CFlags.IsIndirect) {
- assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
- RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
- PPC::R12), Callee));
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- if (CFlags.IsTailCall)
- PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
- TailCallArguments);
-
- return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
- Callee, SPDiff, NumBytes, Ins, InVals, CB);
-}
-
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State) {
@@ -7053,9 +6267,10 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
- assert((!ValVT.isInteger() ||
- (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
- "Integer argument exceeds register size: should have been legalized");
+ if (ValVT.isVector() && !State.getMachineFunction()
+ .getTarget()
+ .Options.EnableAIXExtendedAltivecABI)
+ report_fatal_error("the default Altivec AIX ABI is not yet supported");
if (ValVT == MVT::f128)
report_fatal_error("f128 is unimplemented on AIX.");
@@ -7063,9 +6278,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
if (ArgFlags.isNest())
report_fatal_error("Nest arguments are unimplemented.");
- if (ValVT.isVector() || LocVT.isVector())
- report_fatal_error("Vector arguments are unimplemented on AIX.");
-
static const MCPhysReg GPR_32[] = {// 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10};
@@ -7073,6 +6285,11 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+ static const MCPhysReg VR[] = {// Vector registers.
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5,
+ PPC::V6, PPC::V7, PPC::V8, PPC::V9,
+ PPC::V10, PPC::V11, PPC::V12, PPC::V13};
+
if (ArgFlags.isByVal()) {
if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
report_fatal_error("Pass-by-value arguments with alignment greater than "
@@ -7117,7 +6334,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
case MVT::i32: {
const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
// AIX integer arguments are always passed in register width.
- if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
+ if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
: CCValAssign::LocInfo::ZExt;
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
@@ -7168,6 +6385,25 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v1i128: {
+ if (State.isVarArg())
+ report_fatal_error(
+ "variadic arguments for vector types are unimplemented for AIX");
+
+ if (unsigned VReg = State.AllocateReg(VR))
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
+ else {
+ report_fatal_error(
+ "passing vector parameters to the stack is unimplemented for AIX");
+ }
+ return false;
+ }
}
return true;
}
@@ -7188,6 +6424,14 @@ static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
return &PPC::F4RCRegClass;
case MVT::f64:
return &PPC::F8RCRegClass;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v1i128:
+ return &PPC::VRRCRegClass;
}
}
@@ -7195,7 +6439,7 @@ static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
SelectionDAG &DAG, SDValue ArgValue,
MVT LocVT, const SDLoc &dl) {
assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
- assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
+ assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
if (Flags.isSExt())
ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
@@ -7282,8 +6526,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
const PPCSubtarget &Subtarget =
static_cast<const PPCSubtarget &>(DAG.getSubtarget());
- if (Subtarget.hasQPX())
- report_fatal_error("QPX support is not supported on AIX.");
const bool IsPPC64 = Subtarget.isPPC64();
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
@@ -7292,6 +6534,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
SmallVector<CCValAssign, 16> ArgLocs;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
const EVT PtrVT = getPointerTy(MF.getDataLayout());
@@ -7306,6 +6549,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
CCValAssign &VA = ArgLocs[I++];
MVT LocVT = VA.getLocVT();
ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
+ if (VA.isMemLoc() && VA.getValVT().isVector())
+ report_fatal_error(
+ "passing vector parameters to the stack is unimplemented for AIX");
// For compatibility with the AIX XL compiler, the float args in the
// parameter save area are initialized even if the argument is available
@@ -7316,6 +6562,15 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (VA.isMemLoc() && VA.needsCustom())
continue;
+ if (VA.isRegLoc()) {
+ if (VA.getValVT().isScalarInteger())
+ FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
+ else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector())
+ FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32
+ ? PPCFunctionInfo::ShortFloatPoint
+ : PPCFunctionInfo::LongFloatPoint);
+ }
+
if (Flags.isByVal() && VA.isMemLoc()) {
const unsigned Size =
alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
@@ -7361,10 +6616,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// to extracting the value from the register directly, and elide the
// stores when the arguments address is not taken, but that will need to
// be future work.
- SDValue Store =
- DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
- DAG.getObjectPtrOffset(dl, FIN, Offset),
- MachinePointerInfo::getFixedStack(MF, FI, Offset));
+ SDValue Store = DAG.getStore(
+ CopyFrom.getValue(1), dl, CopyFrom,
+ DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
+ MachinePointerInfo::getFixedStack(MF, FI, Offset));
MemOps.push_back(Store);
};
@@ -7379,6 +6634,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
const CCValAssign RL = ArgLocs[I++];
HandleRegLoc(RL.getLocReg(), Offset);
+ FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
}
if (Offset != StackSize) {
@@ -7400,7 +6656,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
if (ValVT.isScalarInteger() &&
- (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
+ (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
ArgValue =
truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
}
@@ -7441,7 +6697,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// aligned stack.
CallerReservedArea =
EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setMinReservedArea(CallerReservedArea);
if (isVarArg) {
@@ -7503,10 +6758,6 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const PPCSubtarget& Subtarget =
static_cast<const PPCSubtarget&>(DAG.getSubtarget());
- if (Subtarget.hasQPX())
- report_fatal_error("QPX is not supported on AIX.");
- if (Subtarget.hasAltivec())
- report_fatal_error("Altivec support is unimplemented on AIX.");
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<CCValAssign, 16> ArgLocs;
@@ -7563,11 +6814,12 @@ SDValue PPCTargetLowering::LowerCall_AIX(
}
auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
- return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
- (LoadOffset != 0)
- ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
- : Arg,
- MachinePointerInfo(), VT);
+ return DAG.getExtLoad(
+ ISD::ZEXTLOAD, dl, PtrVT, Chain,
+ (LoadOffset != 0)
+ ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
+ : Arg,
+ MachinePointerInfo(), VT);
};
unsigned LoadOffset = 0;
@@ -7597,9 +6849,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// Only memcpy the bytes that don't pass in register.
MemcpyFlags.setByValSize(ByValSize - LoadOffset);
Chain = CallSeqStart = createMemcpyOutsideCallSeq(
- (LoadOffset != 0) ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
- : Arg,
- DAG.getObjectPtrOffset(dl, StackPtr, ByValVA.getLocMemOffset()),
+ (LoadOffset != 0)
+ ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
+ : Arg,
+ DAG.getObjectPtrOffset(dl, StackPtr,
+ TypeSize::Fixed(ByValVA.getLocMemOffset())),
CallSeqStart, MemcpyFlags, DAG, dl);
continue;
}
@@ -7649,6 +6903,10 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const MVT LocVT = VA.getLocVT();
const MVT ValVT = VA.getValVT();
+ if (VA.isMemLoc() && VA.getValVT().isVector())
+ report_fatal_error(
+ "passing vector parameters to the stack is unimplemented for AIX");
+
switch (VA.getLocInfo()) {
default:
report_fatal_error("Unexpected argument extension type.");
@@ -7690,7 +6948,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// f32 in 32-bit GPR
// f64 in 64-bit GPR
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
- else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
+ else if (Arg.getValueType().getFixedSizeInBits() <
+ LocVT.getFixedSizeInBits())
// f32 in 64-bit GPR.
RegsToPass.push_back(std::make_pair(
VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
@@ -8049,20 +7308,45 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
// <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
- assert(Op.getValueType().isVector() && "Vector type expected.");
-
- SDLoc DL(Op);
- SDValue N1 = Op.getOperand(0);
- unsigned SrcSize = N1.getValueType().getSizeInBits();
- assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
- SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
-
EVT TrgVT = Op.getValueType();
+ assert(TrgVT.isVector() && "Vector type expected.");
unsigned TrgNumElts = TrgVT.getVectorNumElements();
EVT EltVT = TrgVT.getVectorElementType();
+ if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
+ TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
+ !isPowerOf2_32(EltVT.getSizeInBits()))
+ return SDValue();
+
+ SDValue N1 = Op.getOperand(0);
+ EVT SrcVT = N1.getValueType();
+ unsigned SrcSize = SrcVT.getSizeInBits();
+ if (SrcSize > 256 ||
+ !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
+ !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
+ return SDValue();
+ if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
+ return SDValue();
+
unsigned WideNumElts = 128 / EltVT.getSizeInBits();
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+ SDLoc DL(Op);
+ SDValue Op1, Op2;
+ if (SrcSize == 256) {
+ EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
+ EVT SplitVT =
+ N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned SplitNumElts = SplitVT.getVectorNumElements();
+ Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
+ DAG.getConstant(0, DL, VecIdxTy));
+ Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
+ DAG.getConstant(SplitNumElts, DL, VecIdxTy));
+ }
+ else {
+ Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
+ Op2 = DAG.getUNDEF(WideVT);
+ }
+
// First list the elements we want to keep.
unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
SmallVector<int, 16> ShuffV;
@@ -8078,16 +7362,17 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
// ShuffV.push_back(i + WideNumElts);
ShuffV.push_back(WideNumElts + 1);
- SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
- return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
+ Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
+ return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
}
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- // Not FP? Not a fsel.
+ // Not FP, or using SPE? Not a fsel.
if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
- !Op.getOperand(2).getValueType().isFloatingPoint())
+ !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
return Op;
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -8203,54 +7488,105 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
-void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
- SelectionDAG &DAG,
- const SDLoc &dl) const {
- assert(Op.getOperand(0).getValueType().isFloatingPoint());
- SDValue Src = Op.getOperand(0);
- if (Src.getValueType() == MVT::f32)
- Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
-
- SDValue Tmp;
+static unsigned getPPCStrictOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("No strict version of this opcode!");
+ case PPCISD::FCTIDZ:
+ return PPCISD::STRICT_FCTIDZ;
+ case PPCISD::FCTIWZ:
+ return PPCISD::STRICT_FCTIWZ;
+ case PPCISD::FCTIDUZ:
+ return PPCISD::STRICT_FCTIDUZ;
+ case PPCISD::FCTIWUZ:
+ return PPCISD::STRICT_FCTIWUZ;
+ case PPCISD::FCFID:
+ return PPCISD::STRICT_FCFID;
+ case PPCISD::FCFIDU:
+ return PPCISD::STRICT_FCFIDU;
+ case PPCISD::FCFIDS:
+ return PPCISD::STRICT_FCFIDS;
+ case PPCISD::FCFIDUS:
+ return PPCISD::STRICT_FCFIDUS;
+ }
+}
+
+static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ SDLoc dl(Op);
+ bool IsStrict = Op->isStrictFPOpcode();
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
+ // For strict nodes, source is the second operand.
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ assert(Src.getValueType().isFloatingPoint());
+ if (Src.getValueType() == MVT::f32) {
+ if (IsStrict) {
+ Src =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
+ DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
+ Chain = Src.getValue(1);
+ } else
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+ }
+ SDValue Conv;
+ unsigned Opc = ISD::DELETED_NODE;
switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
- Tmp = DAG.getNode(
- Op.getOpcode() == ISD::FP_TO_SINT
- ? PPCISD::FCTIWZ
- : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
- dl, MVT::f64, Src);
+ Opc = IsSigned ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
break;
case MVT::i64:
- assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+ assert((IsSigned || Subtarget.hasFPCVT()) &&
"i64 FP_TO_UINT is supported only with FPCVT");
- Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
- PPCISD::FCTIDUZ,
- dl, MVT::f64, Src);
- break;
+ Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
}
+ if (IsStrict) {
+ Opc = getPPCStrictOpcode(Opc);
+ Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
+ {Chain, Src}, Flags);
+ } else {
+ Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
+ }
+ return Conv;
+}
+
+void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ const SDLoc &dl) const {
+ SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
+ bool IsStrict = Op->isStrictFPOpcode();
// Convert the FP value to an int value through memory.
bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
- (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
+ (IsSigned || Subtarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Emit a store to the stack slot.
- SDValue Chain;
+ SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
if (i32Stack) {
MachineFunction &MF = DAG.getMachineFunction();
Alignment = Align(4);
MachineMemOperand *MMO =
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
- SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ SDValue Ops[] = { Chain, Tmp, FIPtr };
Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
} else
- Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI, Alignment);
+ Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias on big endian.
@@ -8272,76 +7608,100 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
SelectionDAG &DAG,
const SDLoc &dl) const {
- assert(Op.getOperand(0).getValueType().isFloatingPoint());
- SDValue Src = Op.getOperand(0);
-
- if (Src.getValueType() == MVT::f32)
- Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
-
- SDValue Tmp;
- switch (Op.getSimpleValueType().SimpleTy) {
- default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
- case MVT::i32:
- Tmp = DAG.getNode(
- Op.getOpcode() == ISD::FP_TO_SINT
- ? PPCISD::FCTIWZ
- : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
- dl, MVT::f64, Src);
- Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
- break;
- case MVT::i64:
- assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
- "i64 FP_TO_UINT is supported only with FPCVT");
- Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
- PPCISD::FCTIDUZ,
- dl, MVT::f64, Src);
- Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
- break;
- }
- return Tmp;
+ SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
+ SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
+ if (Op->isStrictFPOpcode())
+ return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
+ else
+ return Mov;
}
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const {
+ bool IsStrict = Op->isStrictFPOpcode();
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
+ Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
// FP to INT conversions are legal for f128.
- if (Op->getOperand(0).getValueType() == MVT::f128)
- return Op;
+ if (SrcVT == MVT::f128)
+ return Subtarget.hasP9Vector() ? Op : SDValue();
// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
// PPC (the libcall is not available).
- if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
- if (Op.getValueType() == MVT::i32) {
- if (Op.getOpcode() == ISD::FP_TO_SINT) {
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- MVT::f64, Op.getOperand(0),
+ if (SrcVT == MVT::ppcf128) {
+ if (DstVT == MVT::i32) {
+ // TODO: Conservatively pass only nofpexcept flag here. Need to check and
+ // set other fast-math flags to FP operations in both strict and
+ // non-strict cases. (FP_TO_SINT, FSUB)
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
+ if (IsSigned) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
DAG.getIntPtrConstant(0, dl));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- MVT::f64, Op.getOperand(0),
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
DAG.getIntPtrConstant(1, dl));
- // Add the two halves of the long double in round-to-zero mode.
- SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
-
- // Now use a smaller FP_TO_SINT.
- return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
- }
- if (Op.getOpcode() == ISD::FP_TO_UINT) {
+ // Add the two halves of the long double in round-to-zero mode, and use
+ // a smaller FP_TO_SINT.
+ if (IsStrict) {
+ SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ {Op.getOperand(0), Lo, Hi}, Flags);
+ return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ {Res.getValue(1), Res}, Flags);
+ } else {
+ SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+ } else {
const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
- SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
- // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
- // FIXME: generated code sucks.
- // TODO: Are there fast-math-flags to propagate to this FSUB?
- SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
- Op.getOperand(0), Tmp);
- True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
- True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
- DAG.getConstant(0x80000000, dl, MVT::i32));
- SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
- Op.getOperand(0));
- return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
- ISD::SETGE);
+ SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+ SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
+ if (IsStrict) {
+ // Sel = Src < 0x80000000
+ // FltOfs = select Sel, 0.0, 0x80000000
+ // IntOfs = select Sel, 0, 0x80000000
+ // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
+ SDValue Chain = Op.getOperand(0);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ EVT DstSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
+ SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
+ Chain, true);
+ Chain = Sel.getValue(1);
+
+ SDValue FltOfs = DAG.getSelect(
+ dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
+ DAG.getVTList(SrcVT, MVT::Other),
+ {Chain, Src, FltOfs}, Flags);
+ Chain = Val.getValue(1);
+ SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
+ DAG.getVTList(DstVT, MVT::Other),
+ {Chain, Val}, Flags);
+ Chain = SInt.getValue(1);
+ SDValue IntOfs = DAG.getSelect(
+ dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
+ SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
+ return DAG.getMergeValues({Result, Chain}, dl);
+ } else {
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
+ True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
+ SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
+ return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
+ }
}
}
@@ -8370,6 +7730,10 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET) const {
+ // Conservatively skip reusing for constrained FP nodes.
+ if (Op->isStrictFPOpcode())
+ return false;
+
SDLoc dl(Op);
bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
(Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
@@ -8389,6 +7753,13 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
if (LD->getMemoryVT() != MemVT)
return false;
+ // If the result of the load is an illegal type, then we can't build a
+ // valid chain for reuse since the legalised loads and token factor node that
+ // ties the legalised loads together uses a different output chain then the
+ // illegal load.
+ if (!isTypeLegal(LD->getValueType(0)))
+ return false;
+
RLI.Ptr = LD->getBasePtr();
if (LD->isIndexed() && !LD->getOffset().isUndef()) {
assert(LD->getAddressingMode() == ISD::PRE_INC &&
@@ -8453,13 +7824,41 @@ bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
continue;
if (UI->getOpcode() != ISD::SINT_TO_FP &&
- UI->getOpcode() != ISD::UINT_TO_FP)
+ UI->getOpcode() != ISD::UINT_TO_FP &&
+ UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
+ UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
return true;
}
return false;
}
+static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget,
+ SDValue Chain = SDValue()) {
+ bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
+ SDLoc dl(Op);
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
+ unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
+ : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
+ EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
+ if (Op->isStrictFPOpcode()) {
+ if (!Chain)
+ Chain = Op.getOperand(0);
+ return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
+ DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
+ } else
+ return DAG.getNode(ConvOpc, dl, ConvTy, Src);
+}
+
/// Custom lowers integer to floating point conversions to use
/// the direct move instructions available in ISA 2.07 to avoid the
/// need for load/store combinations.
@@ -8471,25 +7870,13 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
"Invalid floating point type as target of conversion");
assert(Subtarget.hasFPCVT() &&
"Int to FP conversions with direct moves require FPCVT");
- SDValue FP;
- SDValue Src = Op.getOperand(0);
- bool SinglePrec = Op.getValueType() == MVT::f32;
+ SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
- bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
- unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
- (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
-
- if (WordInt) {
- FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
- dl, MVT::f64, Src);
- FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
- }
- else {
- FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
- FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
- }
-
- return FP;
+ bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
+ unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
+ SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
+ return convertIntToFP(Op, Mov, DAG, Subtarget);
}
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
@@ -8514,17 +7901,23 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const {
-
+ bool IsStrict = Op->isStrictFPOpcode();
unsigned Opc = Op.getOpcode();
- assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
+ Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type");
assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
"Supports conversions to v2f64/v4f32 only.");
- bool SignedConv = Opc == ISD::SINT_TO_FP;
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
+ bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
bool FourEltRes = Op.getValueType() == MVT::v4f32;
- SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
+ SDValue Wide = widenVec(DAG, Src, dl);
EVT WideVT = Wide.getValueType();
unsigned WideNumElts = WideVT.getVectorNumElements();
MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
@@ -8549,7 +7942,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
SDValue Extend;
if (SignedConv) {
Arrange = DAG.getBitcast(IntermediateVT, Arrange);
- EVT ExtVT = Op.getOperand(0).getValueType();
+ EVT ExtVT = Src.getValueType();
if (Subtarget.hasP9Altivec())
ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
IntermediateVT.getVectorNumElements());
@@ -8559,14 +7952,27 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
} else
Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
+ if (IsStrict)
+ return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
+ {Op.getOperand(0), Extend}, Flags);
+
return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
}
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
- EVT InVT = Op.getOperand(0).getValueType();
+ EVT InVT = Src.getValueType();
EVT OutVT = Op.getValueType();
if (OutVT.isVector() && OutVT.isFloatingPoint() &&
isOperationCustom(Op.getOpcode(), InVT))
@@ -8574,37 +7980,21 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// Conversions to f128 are legal.
if (Op.getValueType() == MVT::f128)
- return Op;
-
- if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
- if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
- return SDValue();
-
- SDValue Value = Op.getOperand(0);
- // The values are now known to be -1 (false) or 1 (true). To convert this
- // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
- // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
- Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-
- SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
-
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
-
- if (Op.getValueType() != MVT::v4f64)
- Value = DAG.getNode(ISD::FP_ROUND, dl,
- Op.getValueType(), Value,
- DAG.getIntPtrConstant(1, dl));
- return Value;
- }
+ return Subtarget.hasP9Vector() ? Op : SDValue();
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
- if (Op.getOperand(0).getValueType() == MVT::i1)
- return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
- DAG.getConstantFP(1.0, dl, Op.getValueType()),
- DAG.getConstantFP(0.0, dl, Op.getValueType()));
+ if (Src.getValueType() == MVT::i1) {
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
+ DAG.getConstantFP(1.0, dl, Op.getValueType()),
+ DAG.getConstantFP(0.0, dl, Op.getValueType()));
+ if (IsStrict)
+ return DAG.getMergeValues({Sel, Chain}, dl);
+ else
+ return Sel;
+ }
// If we have direct moves, we can do all the conversion, skip the store/load
// however, without FPCVT we can't do most conversions.
@@ -8612,22 +8002,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
Subtarget.isPPC64() && Subtarget.hasFPCVT())
return LowerINT_TO_FPDirectMove(Op, DAG, dl);
- assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+ assert((IsSigned || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
- // If we have FCFIDS, then use it when converting to single-precision.
- // Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
- ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
- : PPCISD::FCFIDS)
- : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
- : PPCISD::FCFID);
- MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
- ? MVT::f32
- : MVT::f64;
-
- if (Op.getOperand(0).getValueType() == MVT::i64) {
- SDValue SINT = Op.getOperand(0);
+ if (Src.getValueType() == MVT::i64) {
+ SDValue SINT = Src;
// When converting to single-precision, we actually need to convert
// to double-precision first and then round to single-precision.
// To avoid double-rounding effects during that operation, we have
@@ -8715,16 +8094,16 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FrameIdx));
+ SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FrameIdx));
+ Chain = Store;
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
- RLI.Chain = Store;
+ RLI.Chain = Chain;
RLI.MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
RLI.Alignment = Align(4);
@@ -8737,18 +8116,27 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
+ Chain = Bits.getValue(1);
} else
Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
- SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+ SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
+ if (IsStrict)
+ Chain = FP.getValue(1);
- if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
- FP = DAG.getNode(ISD::FP_ROUND, dl,
- MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+ if (IsStrict)
+ FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
+ DAG.getVTList(MVT::f32, MVT::Other),
+ {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
+ else
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
+ DAG.getIntPtrConstant(0, dl));
+ }
return FP;
}
- assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ assert(Src.getValueType() == MVT::i32 &&
"Unhandled INT_TO_FP type in custom expander!");
// Since we only generate this in 64-bit mode, we can take advantage of
// 64-bit registers. In particular, sign extend the input value into the
@@ -8762,21 +8150,20 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
ReuseLoadInfo RLI;
bool ReusingLoad;
- if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
- DAG))) {
+ if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FrameIdx));
+ SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FrameIdx));
+ Chain = Store;
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
- RLI.Chain = Store;
+ RLI.Chain = Chain;
RLI.MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
RLI.Alignment = Align(4);
@@ -8786,10 +8173,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
RLI.Alignment, RLI.AAInfo, RLI.Ranges);
SDValue Ops[] = { RLI.Chain, RLI.Ptr };
- Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
- PPCISD::LFIWZX : PPCISD::LFIWAX,
- dl, DAG.getVTList(MVT::f64, MVT::Other),
- Ops, MVT::i32, MMO);
+ Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other), Ops,
+ MVT::i32, MMO);
+ Chain = Ld.getValue(1);
if (ReusingLoad)
spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
} else {
@@ -8799,25 +8186,34 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
- Op.getOperand(0));
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
// STD the extended value into the stack slot.
SDValue Store = DAG.getStore(
- DAG.getEntryNode(), dl, Ext64, FIdx,
+ Chain, dl, Ext64, FIdx,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
+ Chain = Store;
// Load the value as a double.
Ld = DAG.getLoad(
- MVT::f64, dl, Store, FIdx,
+ MVT::f64, dl, Chain, FIdx,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
+ Chain = Ld.getValue(1);
}
// FCFID it and return it.
- SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
- if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
- FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
- DAG.getIntPtrConstant(0, dl));
+ SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
+ if (IsStrict)
+ Chain = FP.getValue(1);
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+ if (IsStrict)
+ FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
+ DAG.getVTList(MVT::f32, MVT::Other),
+ {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
+ else
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
+ DAG.getIntPtrConstant(0, dl));
+ }
return FP;
}
@@ -8852,16 +8248,24 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
Chain = MFFS.getValue(1);
- // Save FP register to stack slot
- int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
+ SDValue CWD;
+ if (isTypeLegal(MVT::i64)) {
+ CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
+ } else {
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
- // Load FP Control Word from low 32 bits of stack slot.
- SDValue Four = DAG.getConstant(4, dl, PtrVT);
- SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
- SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
- Chain = CWD.getValue(1);
+ // Load FP Control Word from low 32 bits of stack slot.
+ assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
+ "Stack slot adjustment is valid only on big endian subtargets!");
+ SDValue Four = DAG.getConstant(4, dl, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
+ Chain = CWD.getValue(1);
+ }
// Transform as necessary
SDValue CWD1 =
@@ -8972,6 +8376,31 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(OutOps, dl);
}
+SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+
+ bool IsFSHL = Op.getOpcode() == ISD::FSHL;
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+ SDValue Z = Op.getOperand(2);
+ EVT AmtVT = Z.getValueType();
+
+ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ // This is simpler than TargetLowering::expandFunnelShift because we can rely
+ // on PowerPC shift by BW being well defined.
+ Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
+ DAG.getConstant(BitWidth - 1, dl, AmtVT));
+ SDValue SubZ =
+ DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
+ X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
+ Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
+ return DAG.getNode(ISD::OR, dl, VT, X, Y);
+}
+
//===----------------------------------------------------------------------===//
// Vector related lowering.
//
@@ -8987,7 +8416,7 @@ static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
- if (Val == ((1LU << (SplatSize * 8)) - 1)) {
+ if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
SplatSize = 1;
Val = 0xFF;
}
@@ -9111,13 +8540,15 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
Op0.getOperand(1));
}
-static const SDValue *getNormalLoadInput(const SDValue &Op) {
+static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
const SDValue *InputLoad = &Op;
if (InputLoad->getOpcode() == ISD::BITCAST)
InputLoad = &InputLoad->getOperand(0);
if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
- InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED)
+ InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
+ IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
InputLoad = &InputLoad->getOperand(0);
+ }
if (InputLoad->getOpcode() != ISD::LOAD)
return nullptr;
LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
@@ -9163,110 +8594,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
- if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
- // We first build an i32 vector, load it into a QPX register,
- // then convert it to a floating-point vector and compare it
- // to a zero vector to get the boolean result.
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
- assert(BVN->getNumOperands() == 4 &&
- "BUILD_VECTOR for v4i1 does not have 4 operands");
-
- bool IsConst = true;
- for (unsigned i = 0; i < 4; ++i) {
- if (BVN->getOperand(i).isUndef()) continue;
- if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
- IsConst = false;
- break;
- }
- }
-
- if (IsConst) {
- Constant *One =
- ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
- Constant *NegOne =
- ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
-
- Constant *CV[4];
- for (unsigned i = 0; i < 4; ++i) {
- if (BVN->getOperand(i).isUndef())
- CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
- else if (isNullConstant(BVN->getOperand(i)))
- CV[i] = NegOne;
- else
- CV[i] = One;
- }
-
- Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx =
- DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), Align(16));
-
- SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
- SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
- return DAG.getMemIntrinsicNode(
- PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
- }
-
- SmallVector<SDValue, 4> Stores;
- for (unsigned i = 0; i < 4; ++i) {
- if (BVN->getOperand(i).isUndef()) continue;
-
- unsigned Offset = 4*i;
- SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
-
- unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
- if (StoreSize > 4) {
- Stores.push_back(
- DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
- PtrInfo.getWithOffset(Offset), MVT::i32));
- } else {
- SDValue StoreValue = BVN->getOperand(i);
- if (StoreSize < 4)
- StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
-
- Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
- PtrInfo.getWithOffset(Offset)));
- }
- }
-
- SDValue StoreChain;
- if (!Stores.empty())
- StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
- else
- StoreChain = DAG.getEntryNode();
-
- // Now load from v4i32 into the QPX register; this will extend it to
- // v4i64 but not yet convert it to a floating point. Nevertheless, this
- // is typed as v4f64 because the QPX register integer states are not
- // explicitly represented.
-
- SDValue Ops[] = {StoreChain,
- DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
- FIdx};
- SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
-
- SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
- dl, VTs, Ops, MVT::v4i32, PtrInfo);
- LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
- DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
- LoadedVect);
-
- SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
-
- return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
- }
-
- // All other QPX vectors are handled by generic code.
- if (Subtarget.hasQPX())
- return SDValue();
-
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
@@ -9277,19 +8604,48 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// If it is a splat of a double, check if we can shrink it to a 32 bit
// non-denormal float which when converted back to double gives us the same
- // double. This is to exploit the XXSPLTIDP instruction.
- if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
- (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
- convertToNonDenormSingle(APSplatBits)) {
- SDValue SplatNode = DAG.getNode(
- PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
- DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
- return DAG.getBitcast(Op.getValueType(), SplatNode);
+ // double. This is to exploit the XXSPLTIDP instruction.+ // If we lose precision, we use XXSPLTI32DX.
+ if (BVNIsConstantSplat && (SplatBitSize == 64) &&
+ Subtarget.hasPrefixInstrs()) {
+ if (convertToNonDenormSingle(APSplatBits) &&
+ (Op->getValueType(0) == MVT::v2f64)) {
+ SDValue SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+ DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
+ } else { // We may lose precision, so we have to use XXSPLTI32DX.
+
+ uint32_t Hi =
+ (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
+ uint32_t Lo =
+ (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
+ SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
+
+ if (!Hi || !Lo)
+ // If either load is 0, then we should generate XXLXOR to set to 0.
+ SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
+
+ if (Hi)
+ SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+ DAG.getTargetConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(Hi, dl, MVT::i32));
+
+ if (Lo)
+ SplatNode =
+ DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+ DAG.getTargetConstant(1, dl, MVT::i32),
+ DAG.getTargetConstant(Lo, dl, MVT::i32));
+
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
+ }
}
if (!BVNIsConstantSplat || SplatBitSize > 32) {
- const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
+ bool IsPermutedLoad = false;
+ const SDValue *InputLoad =
+ getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
// Handle load-and-splat patterns as we have instructions that will do this
// in one go.
if (InputLoad && DAG.isSplatValue(Op, true)) {
@@ -9301,7 +8657,12 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Checking for a single use of this load, we have to check for vector
// width (128 bits) / ElementSize uses (since each operand of the
// BUILD_VECTOR is a separate use of the value.
- if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
+ unsigned NumUsesOfInputLD = 128 / ElementSize;
+ for (SDValue BVInOp : Op->ops())
+ if (BVInOp.isUndef())
+ NumUsesOfInputLD--;
+ assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
+ if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
((Subtarget.hasVSX() && ElementSize == 64) ||
(Subtarget.hasP9Vector() && ElementSize == 32))) {
SDValue Ops[] = {
@@ -9309,17 +8670,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
LD->getBasePtr(), // Ptr
DAG.getValueType(Op.getValueType()) // VT
};
- return
- DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
- DAG.getVTList(Op.getValueType(), MVT::Other),
- Ops, LD->getMemoryVT(), LD->getMemOperand());
+ SDValue LdSplt = DAG.getMemIntrinsicNode(
+ PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+ // Replace all uses of the output chain of the original load with the
+ // output chain of the new load.
+ DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
+ LdSplt.getValue(1));
+ return LdSplt;
}
}
- // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
- // lowered to VSX instructions under certain conditions.
+ // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
+ // 32-bits can be lowered to VSX instructions under certain conditions.
// Without VSX, there is no pattern more efficient than expanding the node.
- if (Subtarget.hasVSX() &&
+ if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
Subtarget.hasP8Vector()))
return Op;
@@ -9348,7 +8713,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
// turned into a 4-byte splat of 0xABABABAB.
if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
- return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
+ return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
Op.getValueType(), DAG, dl);
if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
@@ -9444,17 +8809,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
- // vsplti + sra self.
- if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
- SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
- static const unsigned IIDs[] = { // Intrinsic to use for each size.
- Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
- Intrinsic::ppc_altivec_vsraw
- };
- Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
- }
-
// vsplti + rol self.
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
@@ -9912,7 +9266,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// If this is a load-and-splat, we can do that with a single instruction
// in some cases. However if the load has multiple uses, we don't want to
// combine it because that will just produce multiple loads.
- const SDValue *InputLoad = getNormalLoadInput(V1);
+ bool IsPermutedLoad = false;
+ const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
(PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
InputLoad->hasOneUse()) {
@@ -9920,6 +9275,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
int SplatIdx =
PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
+ // The splat index for permuted loads will be in the left half of the vector
+ // which is strictly wider than the loaded value by 8 bytes. So we need to
+ // adjust the splat index to point to the correct address in memory.
+ if (IsPermutedLoad) {
+ assert(isLittleEndian && "Unexpected permuted load on big endian target");
+ SplatIdx += IsFourByte ? 2 : 1;
+ assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
+ "Splat of a value outside of the loaded memory");
+ }
+
LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
// For 4-byte load-and-splat, we need Power9.
if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
@@ -9929,10 +9294,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
else
Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
- // If we are loading a partial vector, it does not make sense to adjust
- // the base pointer. This happens with (splat (s_to_v_permuted (ld))).
- if (LD->getMemoryVT().getSizeInBits() == (IsFourByte ? 32 : 64))
- Offset = 0;
SDValue BasePtr = LD->getBasePtr();
if (Offset != 0)
BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
@@ -9947,6 +9308,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue LdSplt =
DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
Ops, LD->getMemoryVT(), LD->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
if (LdSplt.getValueType() != SVOp->getValueType(0))
LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
return LdSplt;
@@ -10050,42 +9412,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
}
}
- if (Subtarget.hasQPX()) {
- if (VT.getVectorNumElements() != 4)
- return SDValue();
-
- if (V2.isUndef()) V2 = V1;
-
- int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
- if (AlignIdx != -1) {
- return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
- DAG.getConstant(AlignIdx, dl, MVT::i32));
- } else if (SVOp->isSplat()) {
- int SplatIdx = SVOp->getSplatIndex();
- if (SplatIdx >= 4) {
- std::swap(V1, V2);
- SplatIdx -= 4;
- }
-
- return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
- DAG.getConstant(SplatIdx, dl, MVT::i32));
- }
-
- // Lower this into a qvgpci/qvfperm pair.
-
- // Compute the qvgpci literal
- unsigned idx = 0;
- for (unsigned i = 0; i < 4; ++i) {
- int m = SVOp->getMaskElt(i);
- unsigned mm = m >= 0 ? (unsigned) m : i;
- idx |= mm << (3-i)*3;
- }
-
- SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
- DAG.getConstant(idx, dl, MVT::i32));
- return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
- }
-
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
@@ -10347,6 +9673,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
return false;
break;
+ case Intrinsic::ppc_altivec_vcmpequq:
+ case Intrinsic::ppc_altivec_vcmpgtsq:
+ case Intrinsic::ppc_altivec_vcmpgtuq:
+ if (!Subtarget.isISA3_1())
+ return false;
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpequq:
+ CompareOpc = 455;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsq:
+ CompareOpc = 903;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuq:
+ CompareOpc = 647;
+ break;
+ }
+ break;
+
// VSX predicate comparisons use the same infrastructure
case Intrinsic::ppc_vsx_xvcmpeqdp_p:
case Intrinsic::ppc_vsx_xvcmpgedp_p:
@@ -10470,6 +9816,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
else
return false;
break;
+ case Intrinsic::ppc_altivec_vcmpequq_p:
+ case Intrinsic::ppc_altivec_vcmpgtsq_p:
+ case Intrinsic::ppc_altivec_vcmpgtuq_p:
+ if (!Subtarget.isISA3_1())
+ return false;
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpequq_p:
+ CompareOpc = 455;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsq_p:
+ CompareOpc = 903;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuq_p:
+ CompareOpc = 647;
+ break;
+ }
+ isDot = true;
+ break;
}
return true;
}
@@ -10483,11 +9849,32 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
- if (IntrinsicID == Intrinsic::thread_pointer) {
+ switch (IntrinsicID) {
+ case Intrinsic::thread_pointer:
// Reads the thread pointer register, used for __builtin_thread_pointer.
if (Subtarget.isPPC64())
return DAG.getRegister(PPC::X13, MVT::i64);
return DAG.getRegister(PPC::R2, MVT::i32);
+
+ case Intrinsic::ppc_mma_disassemble_acc:
+ case Intrinsic::ppc_vsx_disassemble_pair: {
+ int NumVecs = 2;
+ SDValue WideVec = Op.getOperand(1);
+ if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
+ NumVecs = 4;
+ WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
+ }
+ SmallVector<SDValue, 4> RetOps;
+ for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
+ SDValue Extract = DAG.getNode(
+ PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
+ DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
+ : VecNo,
+ dl, MVT::i64));
+ RetOps.push_back(Extract);
+ }
+ return DAG.getMergeValues(RetOps, dl);
+ }
}
// If this is a lowered altivec predicate compare, CompareOpc is set to the
@@ -10512,7 +9899,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(CompareOpc, dl, MVT::i32)
};
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
@@ -10673,154 +10060,51 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return Op;
}
-SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc dl(Op);
- SDNode *N = Op.getNode();
-
- assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
- "Unknown extract_vector_elt type");
-
- SDValue Value = N->getOperand(0);
-
- // The first part of this is like the store lowering except that we don't
- // need to track the chain.
-
- // The values are now known to be -1 (false) or 1 (true). To convert this
- // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
- // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
- Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-
- // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
- // understand how to form the extending load.
- SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
-
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
-
- // Now convert to an integer and store.
- Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
- DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
- Value);
-
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
- SDValue StoreChain = DAG.getEntryNode();
- SDValue Ops[] = {StoreChain,
- DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
- Value, FIdx};
- SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
-
- StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
- dl, VTs, Ops, MVT::v4i32, PtrInfo);
-
- // Extract the value requested.
- unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
-
- SDValue IntVal =
- DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
-
- if (!Subtarget.useCRBits())
- return IntVal;
-
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
-}
-
-/// Lowering for QPX v4i1 loads
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
SDValue LoadChain = LN->getChain();
SDValue BasePtr = LN->getBasePtr();
+ EVT VT = Op.getValueType();
- if (Op.getValueType() == MVT::v4f64 ||
- Op.getValueType() == MVT::v4f32) {
- EVT MemVT = LN->getMemoryVT();
- unsigned Alignment = LN->getAlignment();
-
- // If this load is properly aligned, then it is legal.
- if (Alignment >= MemVT.getStoreSize())
- return Op;
-
- EVT ScalarVT = Op.getValueType().getScalarType(),
- ScalarMemVT = MemVT.getScalarType();
- unsigned Stride = ScalarMemVT.getStoreSize();
-
- SDValue Vals[4], LoadChains[4];
- for (unsigned Idx = 0; Idx < 4; ++Idx) {
- SDValue Load;
- if (ScalarVT != ScalarMemVT)
- Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
- BasePtr,
- LN->getPointerInfo().getWithOffset(Idx * Stride),
- ScalarMemVT, MinAlign(Alignment, Idx * Stride),
- LN->getMemOperand()->getFlags(), LN->getAAInfo());
- else
- Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
- LN->getPointerInfo().getWithOffset(Idx * Stride),
- MinAlign(Alignment, Idx * Stride),
- LN->getMemOperand()->getFlags(), LN->getAAInfo());
-
- if (Idx == 0 && LN->isIndexed()) {
- assert(LN->getAddressingMode() == ISD::PRE_INC &&
- "Unknown addressing mode on vector load");
- Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
- LN->getAddressingMode());
- }
-
- Vals[Idx] = Load;
- LoadChains[Idx] = Load.getValue(1);
-
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Stride, dl,
- BasePtr.getValueType()));
- }
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
-
- if (LN->isIndexed()) {
- SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
- return DAG.getMergeValues(RetOps, dl);
- }
-
- SDValue RetOps[] = { Value, TF };
- return DAG.getMergeValues(RetOps, dl);
- }
-
- assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
- assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
-
- // To lower v4i1 from a byte array, we load the byte elements of the
- // vector and then reuse the BUILD_VECTOR logic.
-
- SDValue VectElmts[4], VectElmtChains[4];
- for (unsigned i = 0; i < 4; ++i) {
- SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
-
- VectElmts[i] = DAG.getExtLoad(
- ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
- LN->getPointerInfo().getWithOffset(i), MVT::i8,
- /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
- VectElmtChains[i] = VectElmts[i].getValue(1);
- }
-
- LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
- SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
+ if (VT != MVT::v256i1 && VT != MVT::v512i1)
+ return Op;
- SDValue RVals[] = { Value, LoadChain };
- return DAG.getMergeValues(RVals, dl);
+ // Type v256i1 is used for pairs and v512i1 is used for accumulators.
+ // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
+ // 2 or 4 vsx registers.
+ assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
+ "Type unsupported without MMA");
+ assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
+ "Type unsupported without paired vector support");
+ Align Alignment = LN->getAlign();
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> LoadChains;
+ unsigned NumVecs = VT.getSizeInBits() / 128;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ SDValue Load =
+ DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx * 16),
+ commonAlignment(Alignment, Idx * 16),
+ LN->getMemOperand()->getFlags(), LN->getAAInfo());
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(16, dl, BasePtr.getValueType()));
+ Loads.push_back(Load);
+ LoadChains.push_back(Load.getValue(1));
+ }
+ if (Subtarget.isLittleEndian()) {
+ std::reverse(Loads.begin(), Loads.end());
+ std::reverse(LoadChains.begin(), LoadChains.end());
+ }
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ SDValue Value =
+ DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
+ dl, VT, Loads);
+ SDValue RetOps[] = {Value, TF};
+ return DAG.getMergeValues(RetOps, dl);
}
-/// Lowering for QPX v4i1 stores
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -10828,122 +10112,40 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue StoreChain = SN->getChain();
SDValue BasePtr = SN->getBasePtr();
SDValue Value = SN->getValue();
+ EVT StoreVT = Value.getValueType();
- if (Value.getValueType() == MVT::v4f64 ||
- Value.getValueType() == MVT::v4f32) {
- EVT MemVT = SN->getMemoryVT();
- unsigned Alignment = SN->getAlignment();
-
- // If this store is properly aligned, then it is legal.
- if (Alignment >= MemVT.getStoreSize())
- return Op;
-
- EVT ScalarVT = Value.getValueType().getScalarType(),
- ScalarMemVT = MemVT.getScalarType();
- unsigned Stride = ScalarMemVT.getStoreSize();
-
- SDValue Stores[4];
- for (unsigned Idx = 0; Idx < 4; ++Idx) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
- DAG.getVectorIdxConstant(Idx, dl));
- SDValue Store;
- if (ScalarVT != ScalarMemVT)
- Store =
- DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
- SN->getPointerInfo().getWithOffset(Idx * Stride),
- ScalarMemVT, MinAlign(Alignment, Idx * Stride),
- SN->getMemOperand()->getFlags(), SN->getAAInfo());
- else
- Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
- SN->getPointerInfo().getWithOffset(Idx * Stride),
- MinAlign(Alignment, Idx * Stride),
- SN->getMemOperand()->getFlags(), SN->getAAInfo());
-
- if (Idx == 0 && SN->isIndexed()) {
- assert(SN->getAddressingMode() == ISD::PRE_INC &&
- "Unknown addressing mode on vector store");
- Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
- SN->getAddressingMode());
- }
-
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Stride, dl,
- BasePtr.getValueType()));
- Stores[Idx] = Store;
- }
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
-
- if (SN->isIndexed()) {
- SDValue RetOps[] = { TF, Stores[0].getValue(1) };
- return DAG.getMergeValues(RetOps, dl);
- }
-
- return TF;
- }
-
- assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
- assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
-
- // The values are now known to be -1 (false) or 1 (true). To convert this
- // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
- // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
- Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-
- // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
- // understand how to form the extending load.
- SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
-
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
-
- // Now convert to an integer and store.
- Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
- DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
- Value);
-
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
- SDValue Ops[] = {StoreChain,
- DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
- Value, FIdx};
- SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
-
- StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
- dl, VTs, Ops, MVT::v4i32, PtrInfo);
-
- // Move data into the byte array.
- SDValue Loads[4], LoadChains[4];
- for (unsigned i = 0; i < 4; ++i) {
- unsigned Offset = 4*i;
- SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
-
- Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
- PtrInfo.getWithOffset(Offset));
- LoadChains[i] = Loads[i].getValue(1);
- }
-
- StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
-
- SDValue Stores[4];
- for (unsigned i = 0; i < 4; ++i) {
- SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
-
- Stores[i] = DAG.getTruncStore(
- StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
- MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
- SN->getAAInfo());
- }
-
- StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
+ return Op;
- return StoreChain;
+ // Type v256i1 is used for pairs and v512i1 is used for accumulators.
+ // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
+ // underlying registers individually.
+ assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
+ "Type unsupported without MMA");
+ assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
+ "Type unsupported without paired vector support");
+ Align Alignment = SN->getAlign();
+ SmallVector<SDValue, 4> Stores;
+ unsigned NumVecs = 2;
+ if (StoreVT == MVT::v512i1) {
+ Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
+ NumVecs = 4;
+ }
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
+ SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
+ DAG.getConstant(VecNum, dl, MVT::i64));
+ SDValue Store =
+ DAG.getStore(StoreChain, dl, Elt, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx * 16),
+ commonAlignment(Alignment, Idx * 16),
+ SN->getMemOperand()->getFlags(), SN->getAAInfo());
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(16, dl, BasePtr.getValueType()));
+ Stores.push_back(Store);
+ }
+ SDValue TF = DAG.getTokenFactor(dl, Stores);
+ return TF;
}
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
@@ -11010,42 +10212,13 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
}
}
-SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
-
- assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
-
- EVT VT = Op.getValueType();
- assert(VT.isVector() &&
- "Only set vector abs as custom, scalar abs shouldn't reach here!");
- assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
- VT == MVT::v16i8) &&
- "Unexpected vector element type!");
- assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
- "Current subtarget doesn't support smax v2i64!");
-
- // For vector abs, it can be lowered to:
- // abs x
- // ==>
- // y = -x
- // smax(x, y)
-
- SDLoc dl(Op);
- SDValue X = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
-
- // SMAX patch https://reviews.llvm.org/D47332
- // hasn't landed yet, so use intrinsic first here.
- // TODO: Should use SMAX directly once SMAX patch landed
- Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
- if (VT == MVT::v2i64)
- BifID = Intrinsic::ppc_altivec_vmaxsd;
- else if (VT == MVT::v8i16)
- BifID = Intrinsic::ppc_altivec_vmaxsh;
- else if (VT == MVT::v16i8)
- BifID = Intrinsic::ppc_altivec_vmaxsb;
+SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ bool IsStrict = Op->isStrictFPOpcode();
+ if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
+ !Subtarget.hasP9Vector())
+ return SDValue();
- return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
+ return Op;
}
// Custom lowering for fpext vf32 to v2f64
@@ -11158,8 +10331,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -11169,16 +10346,20 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+ case ISD::FSHL: return LowerFunnelShift(Op, DAG);
+ case ISD::FSHR: return LowerFunnelShift(Op, DAG);
+
// Vector-related lowering.
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
- case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
- case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND:
+ return LowerFP_ROUND(Op, DAG);
case ISD::ROTL: return LowerROTL(Op, DAG);
// For counter-based loop handling.
@@ -11246,23 +10427,28 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
// LowerFP_TO_INT() can only handle f32 and f64.
- if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
+ MVT::ppcf128)
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
case ISD::TRUNCATE: {
- EVT TrgVT = N->getValueType(0);
- EVT OpVT = N->getOperand(0).getValueType();
- if (TrgVT.isVector() &&
- isOperationCustom(N->getOpcode(), TrgVT) &&
- OpVT.getSizeInBits() <= 128 &&
- isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
- Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
+ if (!N->getValueType(0).isVector())
+ return;
+ SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
+ if (Lowered)
+ Results.push_back(Lowered);
return;
}
+ case ISD::FSHL:
+ case ISD::FSHR:
+ // Don't handle funnel shifts here.
+ return;
case ISD::BITCAST:
// Don't handle bitcast here.
return;
@@ -11434,17 +10620,88 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
+static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
+ switch(MI.getOpcode()) {
+ default:
+ return false;
+ case PPC::COPY:
+ return TII->isSignExtended(MI);
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHAU:
+ case PPC::LHAU8:
+ case PPC::LHAUX:
+ case PPC::LHAUX8:
+ case PPC::LHAX:
+ case PPC::LHAX8:
+ case PPC::LWA:
+ case PPC::LWAUX:
+ case PPC::LWAX:
+ case PPC::LWAX_32:
+ case PPC::LWA_32:
+ case PPC::PLHA:
+ case PPC::PLHA8:
+ case PPC::PLHA8pc:
+ case PPC::PLHApc:
+ case PPC::PLWA:
+ case PPC::PLWA8:
+ case PPC::PLWA8pc:
+ case PPC::PLWApc:
+ case PPC::EXTSB:
+ case PPC::EXTSB8:
+ case PPC::EXTSB8_32_64:
+ case PPC::EXTSB8_rec:
+ case PPC::EXTSB_rec:
+ case PPC::EXTSH:
+ case PPC::EXTSH8:
+ case PPC::EXTSH8_32_64:
+ case PPC::EXTSH8_rec:
+ case PPC::EXTSH_rec:
+ case PPC::EXTSW:
+ case PPC::EXTSWSLI:
+ case PPC::EXTSWSLI_32_64:
+ case PPC::EXTSWSLI_32_64_rec:
+ case PPC::EXTSWSLI_rec:
+ case PPC::EXTSW_32:
+ case PPC::EXTSW_32_64:
+ case PPC::EXTSW_32_64_rec:
+ case PPC::EXTSW_rec:
+ case PPC::SRAW:
+ case PPC::SRAWI:
+ case PPC::SRAWI_rec:
+ case PPC::SRAW_rec:
+ return true;
+ }
+ return false;
+}
+
MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
MachineInstr &MI, MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const PPCInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // If this is a signed comparison and the value being compared is not known
+ // to be sign extended, sign extend it here.
+ DebugLoc dl = MI.getDebugLoc();
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ Register incr = MI.getOperand(3).getReg();
+ bool IsSignExtended = Register::isVirtualRegister(incr) &&
+ isSignExtended(*RegInfo.getVRegDef(incr), TII);
+
+ if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
+ Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
+ .addReg(MI.getOperand(3).getReg());
+ MI.getOperand(3).setReg(ValueReg);
+ }
// If we support part-word atomic mnemonics, just use them
if (Subtarget.hasPartwordAtomics())
return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
CmpPred);
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
@@ -11454,14 +10711,11 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *F = BB->getParent();
MachineFunction::iterator It = ++BB->getIterator();
Register dest = MI.getOperand(0).getReg();
Register ptrA = MI.getOperand(1).getReg();
Register ptrB = MI.getOperand(2).getReg();
- Register incr = MI.getOperand(3).getReg();
- DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB =
@@ -11475,7 +10729,6 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- MachineRegisterInfo &RegInfo = F->getRegInfo();
const TargetRegisterClass *RC =
is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -11950,18 +11203,34 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
-
- // Get the canonical FinalStackPtr like what
- // PPCRegisterInfo::lowerDynamicAlloc does.
- BuildMI(*MBB, {MI}, DL,
- TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64
- : PPC::PREPARE_PROBED_ALLOCA_32),
- FramePointer)
- .addDef(FinalStackPtr)
+ Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+
+ // Since value of NegSizeReg might be realigned in prologepilog, insert a
+ // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
+ // NegSize.
+ unsigned ProbeOpc;
+ if (!MRI.hasOneNonDBGUse(NegSizeReg))
+ ProbeOpc =
+ isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
+ else
+ // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
+ // and NegSizeReg will be allocated in the same phyreg to avoid
+ // redundant copy when NegSizeReg has only one use which is current MI and
+ // will be replaced by PREPARE_PROBED_ALLOCA then.
+ ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
+ : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
+ BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
+ .addDef(ActualNegSizeReg)
.addReg(NegSizeReg)
.add(MI.getOperand(2))
.add(MI.getOperand(3));
+ // Calculate final stack pointer, which equals to SP + ActualNegSize.
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
+ FinalStackPtr)
+ .addReg(SPReg)
+ .addReg(ActualNegSizeReg);
+
// Materialize a scratch register for update.
int64_t NegProbeSize = -(int64_t)ProbeSize;
assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
@@ -11982,7 +11251,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
// Probing leading residual part.
Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
- .addReg(NegSizeReg)
+ .addReg(ActualNegSizeReg)
.addReg(ScratchReg);
Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
@@ -11991,7 +11260,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
.addReg(Mul)
- .addReg(NegSizeReg);
+ .addReg(ActualNegSizeReg);
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
.addReg(FramePointer)
.addReg(SPReg)
@@ -12102,9 +11371,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
} else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
MI.getOpcode() == PPC::SELECT_CC_F8 ||
MI.getOpcode() == PPC::SELECT_CC_F16 ||
- MI.getOpcode() == PPC::SELECT_CC_QFRC ||
- MI.getOpcode() == PPC::SELECT_CC_QSRC ||
- MI.getOpcode() == PPC::SELECT_CC_QBRC ||
MI.getOpcode() == PPC::SELECT_CC_VRRC ||
MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
@@ -12114,9 +11380,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
MI.getOpcode() == PPC::SELECT_F16 ||
- MI.getOpcode() == PPC::SELECT_QFRC ||
- MI.getOpcode() == PPC::SELECT_QSRC ||
- MI.getOpcode() == PPC::SELECT_QBRC ||
MI.getOpcode() == PPC::SELECT_SPE ||
MI.getOpcode() == PPC::SELECT_SPE4 ||
MI.getOpcode() == PPC::SELECT_VRRC ||
@@ -12154,9 +11417,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_F16 ||
MI.getOpcode() == PPC::SELECT_SPE4 ||
MI.getOpcode() == PPC::SELECT_SPE ||
- MI.getOpcode() == PPC::SELECT_QFRC ||
- MI.getOpcode() == PPC::SELECT_QSRC ||
- MI.getOpcode() == PPC::SELECT_QBRC ||
MI.getOpcode() == PPC::SELECT_VRRC ||
MI.getOpcode() == PPC::SELECT_VSFRC ||
MI.getOpcode() == PPC::SELECT_VSSRC ||
@@ -12639,11 +11899,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
// Set rounding mode to round-to-zero.
- BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
- BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
+ .addImm(31)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
+
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
+ .addImm(30)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
// Perform addition.
- BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+ auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
+ .addReg(Src1)
+ .addReg(Src2);
+ if (MI.getFlag(MachineInstr::NoFPExcept))
+ MIB.setMIFlag(MachineInstr::NoFPExcept);
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
@@ -12702,10 +11971,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// the immediate to set the bits 62:63 of FPSCR.
unsigned Mode = MI.getOperand(1).getImm();
BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
- .addImm(31);
+ .addImm(31)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
- .addImm(30);
+ .addImm(30)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
} else if (MI.getOpcode() == PPC::SETRND) {
DebugLoc dl = MI.getDebugLoc();
@@ -12815,6 +12086,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(NewFPSCRReg)
.addImm(0)
.addImm(0);
+ } else if (MI.getOpcode() == PPC::SETFLM) {
+ DebugLoc Dl = MI.getDebugLoc();
+
+ // Result of setflm is previous FPSCR content, so we need to save it first.
+ Register OldFPSCRReg = MI.getOperand(0).getReg();
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+ // Put bits in 32:63 to FPSCR.
+ Register NewFPSCRReg = MI.getOperand(1).getReg();
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
+ .addImm(255)
+ .addReg(NewFPSCRReg)
+ .addImm(0)
+ .addImm(0);
} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
return emitProbedAlloca(MI, BB);
@@ -12841,6 +12126,47 @@ static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
return RefinementSteps;
}
+SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
+ const DenormalMode &Mode) const {
+ // We only have VSX Vector Test for software Square Root.
+ EVT VT = Op.getValueType();
+ if (!isTypeLegal(MVT::i1) ||
+ (VT != MVT::f64 &&
+ ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
+ return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
+
+ SDLoc DL(Op);
+ // The output register of FTSQRT is CR field.
+ SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
+ // ftsqrt BF,FRB
+ // Let e_b be the unbiased exponent of the double-precision
+ // floating-point operand in register FRB.
+ // fe_flag is set to 1 if either of the following conditions occurs.
+ // - The double-precision floating-point operand in register FRB is a zero,
+ // a NaN, or an infinity, or a negative value.
+ // - e_b is less than or equal to -970.
+ // Otherwise fe_flag is set to 0.
+ // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
+ // not eligible for iteration. (zero/negative/infinity/nan or unbiased
+ // exponent is less than -970)
+ SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
+ return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
+ FTSQRT, SRIdxVal),
+ 0);
+}
+
+SDValue
+PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
+ SelectionDAG &DAG) const {
+ // We only have VSX Vector Square Root.
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f64 &&
+ ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
+ return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
+
+ return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
+}
+
SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
int Enabled, int &RefinementSteps,
bool &UseOneConstNR,
@@ -12849,9 +12175,7 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
- (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
- (VT == MVT::v4f64 && Subtarget.hasQPX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
@@ -12870,9 +12194,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
- (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
- (VT == MVT::v4f64 && Subtarget.hasQPX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
@@ -12970,24 +12292,6 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
- case Intrinsic::ppc_qpx_qvlfd:
- case Intrinsic::ppc_qpx_qvlfda:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvlfs:
- case Intrinsic::ppc_qpx_qvlfsa:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvlfcd:
- case Intrinsic::ppc_qpx_qvlfcda:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvlfcs:
- case Intrinsic::ppc_qpx_qvlfcsa:
- VT = MVT::v2f32;
- break;
- case Intrinsic::ppc_qpx_qvlfiwa:
- case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_vsx_lxvw4x:
@@ -13016,24 +12320,6 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
- case Intrinsic::ppc_qpx_qvstfd:
- case Intrinsic::ppc_qpx_qvstfda:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvstfs:
- case Intrinsic::ppc_qpx_qvstfsa:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvstfcd:
- case Intrinsic::ppc_qpx_qvstfcda:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvstfcs:
- case Intrinsic::ppc_qpx_qvstfcsa:
- VT = MVT::v2f32;
- break;
- case Intrinsic::ppc_qpx_qvstfiw:
- case Intrinsic::ppc_qpx_qvstfiwa:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_vsx_stxvw4x:
@@ -13261,11 +12547,13 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
// We don't really care about what is known about the first bit (if
- // anything), so clear it in all masks prior to comparing them.
- Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
- Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
+ // anything), so pretend that it is known zero for both to ensure they can
+ // be compared as constants.
+ Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
+ Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
- if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
+ if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
+ Op1Known.getConstant() != Op2Known.getConstant())
return SDValue();
}
}
@@ -13317,8 +12605,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
// Visit all inputs, collect all binary operations (and, or, xor and
// select) that are all fed by extensions.
while (!BinOps.empty()) {
- SDValue BinOp = BinOps.back();
- BinOps.pop_back();
+ SDValue BinOp = BinOps.pop_back_val();
if (!Visited.insert(BinOp.getNode()).second)
continue;
@@ -13533,8 +12820,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
// Visit all inputs, collect all binary operations (and, or, xor and
// select) that are all fed by truncations.
while (!BinOps.empty()) {
- SDValue BinOp = BinOps.back();
- BinOps.pop_back();
+ SDValue BinOp = BinOps.pop_back_val();
if (!Visited.insert(BinOp.getNode()).second)
continue;
@@ -14131,6 +13417,46 @@ static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Look for the pattern of a load from a narrow width to i128, feeding
+// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
+// (LXVRZX). This node represents a zero extending load that will be matched
+// to the Load VSX Vector Rightmost instructions.
+static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ // This combine is only eligible for a BUILD_VECTOR of v1i128.
+ if (N->getValueType(0) != MVT::v1i128)
+ return SDValue();
+
+ SDValue Operand = N->getOperand(0);
+ // Proceed with the transformation if the operand to the BUILD_VECTOR
+ // is a load instruction.
+ if (Operand.getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
+ EVT MemoryType = LD->getMemoryVT();
+
+ // This transformation is only valid if the we are loading either a byte,
+ // halfword, word, or doubleword.
+ bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
+ MemoryType == MVT::i32 || MemoryType == MVT::i64;
+
+ // Ensure that the load from the narrow width is being zero extended to i128.
+ if (!ValidLDType ||
+ (LD->getExtensionType() != ISD::ZEXTLOAD &&
+ LD->getExtensionType() != ISD::EXTLOAD))
+ return SDValue();
+
+ SDValue LoadOps[] = {
+ LD->getChain(), LD->getBasePtr(),
+ DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
+
+ return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
+ DAG.getVTList(MVT::v1i128, MVT::Other),
+ LoadOps, MemoryType, LD->getMemOperand());
+}
+
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -14168,6 +13494,14 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
return Reduced;
}
+ // On Power10, the Load VSX Vector Rightmost instructions can be utilized
+ // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
+ // is a load from <valid narrow width> to i128.
+ if (Subtarget.isISA3_1()) {
+ SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
+ if (BVOfZLoad)
+ return BVOfZLoad;
+ }
if (N->getValueType(0) != MVT::v2f64)
return SDValue();
@@ -14231,6 +13565,8 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
// from the hardware.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
+ if (!Op.getOperand(0).getValueType().isSimple())
+ return SDValue();
if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
return SDValue();
@@ -14467,8 +13803,7 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
EVT Op1VT = N->getOperand(1).getValueType();
EVT ResVT = Val.getValueType();
- // Floating point types smaller than 32 bits are not legal on Power.
- if (ResVT.getScalarSizeInBits() < 32)
+ if (!isTypeLegal(ResVT))
return SDValue();
// Only perform combine for conversion to i64/i32 or power9 i16/i8.
@@ -14562,7 +13897,6 @@ static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
ShuffV[i] += HalfVec;
}
- return;
}
// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
@@ -15031,18 +14365,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT MemVT = LD->getMemoryVT();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
- Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
- Align ScalarABIAlignment = DAG.getDataLayout().getABITypeAlign(STy);
if (LD->isUnindexed() && VT.isVector() &&
((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
// P8 and later hardware should just use LOAD.
!Subtarget.hasP8Vector() &&
(VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v4f32)) ||
- (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
- LD->getAlign() >= ScalarABIAlignment)) &&
+ VT == MVT::v4f32))) &&
LD->getAlign() < ABIAlignment) {
- // This is a type-legal unaligned Altivec or QPX load.
+ // This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
bool isLittleEndian = Subtarget.isLittleEndian();
@@ -15073,24 +14403,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// optimization later.
Intrinsic::ID Intr, IntrLD, IntrPerm;
MVT PermCntlTy, PermTy, LDTy;
- if (Subtarget.hasAltivec()) {
- Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
- Intrinsic::ppc_altivec_lvsl;
- IntrLD = Intrinsic::ppc_altivec_lvx;
- IntrPerm = Intrinsic::ppc_altivec_vperm;
- PermCntlTy = MVT::v16i8;
- PermTy = MVT::v4i32;
- LDTy = MVT::v4i32;
- } else {
- Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
- Intrinsic::ppc_qpx_qvlpcls;
- IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
- Intrinsic::ppc_qpx_qvlfs;
- IntrPerm = Intrinsic::ppc_qpx_qvfperm;
- PermCntlTy = MVT::v4f64;
- PermTy = MVT::v4f64;
- LDTy = MemVT.getSimpleVT();
- }
+ Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
+ : Intrinsic::ppc_altivec_lvsl;
+ IntrLD = Intrinsic::ppc_altivec_lvx;
+ IntrPerm = Intrinsic::ppc_altivec_vperm;
+ PermCntlTy = MVT::v16i8;
+ PermTy = MVT::v4i32;
+ LDTy = MVT::v4i32;
SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
@@ -15161,10 +14480,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
BaseLoad, ExtraLoad, PermCntl, DAG, dl);
if (VT != PermTy)
- Perm = Subtarget.hasAltivec() ?
- DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
- DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
- DAG.getTargetConstant(1, dl, MVT::i64));
+ Perm = Subtarget.hasAltivec()
+ ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
+ : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
+ DAG.getTargetConstant(1, dl, MVT::i64));
// second argument is 1 because this rounding
// is always exact.
@@ -15180,14 +14499,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
: Intrinsic::ppc_altivec_lvsl);
- if ((IID == Intr ||
- IID == Intrinsic::ppc_qpx_qvlpcld ||
- IID == Intrinsic::ppc_qpx_qvlpcls) &&
- N->getOperand(1)->getOpcode() == ISD::ADD) {
+ if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
- int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
- 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
+ int Bits = 4 /* 16 byte alignment */;
if (DAG.MaskedValueIsZero(Add->getOperand(1),
APInt::getAllOnesValue(Bits /* alignment */)
@@ -15197,7 +14512,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
UE = BasePtr->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+ IID) {
// We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
@@ -15329,43 +14645,43 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
case PPCISD::VCMP:
- // If a VCMPo node already exists with exactly the same operands as this
- // node, use its result instead of this node (VCMPo computes both a CR6 and
- // a normal output).
+ // If a VCMP_rec node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMP_rec computes both a CR6
+ // and a normal output).
//
if (!N->getOperand(0).hasOneUse() &&
!N->getOperand(1).hasOneUse() &&
!N->getOperand(2).hasOneUse()) {
- // Scan all of the users of the LHS, looking for VCMPo's that match.
- SDNode *VCMPoNode = nullptr;
+ // Scan all of the users of the LHS, looking for VCMP_rec's that match.
+ SDNode *VCMPrecNode = nullptr;
SDNode *LHSN = N->getOperand(0).getNode();
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
UI != E; ++UI)
- if (UI->getOpcode() == PPCISD::VCMPo &&
+ if (UI->getOpcode() == PPCISD::VCMP_rec &&
UI->getOperand(1) == N->getOperand(1) &&
UI->getOperand(2) == N->getOperand(2) &&
UI->getOperand(0) == N->getOperand(0)) {
- VCMPoNode = *UI;
+ VCMPrecNode = *UI;
break;
}
- // If there is no VCMPo node, or if the flag value has a single use, don't
- // transform this.
- if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ // If there is no VCMP_rec node, or if the flag value has a single use,
+ // don't transform this.
+ if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
break;
// Look at the (necessarily single) use of the flag value. If it has a
// chain, this transformation is more complex. Note that multiple things
// could use the value result, which we should ignore.
SDNode *FlagUser = nullptr;
- for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
FlagUser == nullptr; ++UI) {
- assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
- if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
FlagUser = User;
break;
}
@@ -15375,7 +14691,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// If the user is a MFOCRF instruction, we know this is safe.
// Otherwise we give up for right now.
if (FlagUser->getOpcode() == PPCISD::MFOCRF)
- return SDValue(VCMPoNode, 0);
+ return SDValue(VCMPrecNode, 0);
}
break;
case ISD::BRCOND: {
@@ -15464,7 +14780,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getConstant(CompareOpc, dl, MVT::i32)
};
EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
// Unpack the result based on how the target uses it.
PPC::Predicate CompOpc;
@@ -15559,16 +14875,19 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
case Intrinsic::ppc_altivec_vcmpequd_p:
+ case Intrinsic::ppc_altivec_vcmpequq_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ case Intrinsic::ppc_altivec_vcmpgtsq_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
case Intrinsic::ppc_altivec_vcmpgtud_p:
+ case Intrinsic::ppc_altivec_vcmpgtuq_p:
Known.Zero = ~1U; // All bits but the low one are known to be zero.
break;
}
@@ -15746,17 +15065,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::F4RCRegClass);
if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::F8RCRegClass);
- if (VT == MVT::v4f64 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QFRCRegClass);
- if (VT == MVT::v4f32 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QSRCRegClass);
}
break;
case 'v':
- if (VT == MVT::v4f64 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QFRCRegClass);
- if (VT == MVT::v4f32 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QSRCRegClass);
if (Subtarget.hasAltivec())
return std::make_pair(0U, &PPC::VRRCRegClass);
break;
@@ -15892,9 +15203,15 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS, Instruction *I) const {
- // PPC does not allow r+i addressing modes for vectors!
- if (Ty->isVectorTy() && AM.BaseOffs != 0)
+ unsigned AS,
+ Instruction *I) const {
+ // Vector type r+i form is supported since power9 as DQ form. We don't check
+ // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
+ // imm form is preferred and the offset can be adjusted to use imm form later
+ // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
+ // max offset to check legal addressing mode, we should be a little aggressive
+ // to contain other offsets for that LSRUse.
+ if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
return false;
// PPC allows a sign-extended 16-bit immediate field.
@@ -16048,19 +15365,17 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
- case Intrinsic::ppc_qpx_qvlfd:
- case Intrinsic::ppc_qpx_qvlfs:
- case Intrinsic::ppc_qpx_qvlfcd:
- case Intrinsic::ppc_qpx_qvlfcs:
- case Intrinsic::ppc_qpx_qvlfiwa:
- case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:
case Intrinsic::ppc_altivec_lvehx:
case Intrinsic::ppc_altivec_lvewx:
case Intrinsic::ppc_vsx_lxvd2x:
- case Intrinsic::ppc_vsx_lxvw4x: {
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x_be:
+ case Intrinsic::ppc_vsx_lxvw4x_be:
+ case Intrinsic::ppc_vsx_lxvl:
+ case Intrinsic::ppc_vsx_lxvll: {
EVT VT;
switch (Intrinsic) {
case Intrinsic::ppc_altivec_lvebx:
@@ -16073,20 +15388,9 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
VT = MVT::i32;
break;
case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvd2x_be:
VT = MVT::v2f64;
break;
- case Intrinsic::ppc_qpx_qvlfd:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvlfs:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvlfcd:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvlfcs:
- VT = MVT::v2f32;
- break;
default:
VT = MVT::v4i32;
break;
@@ -16101,52 +15405,17 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad;
return true;
}
- case Intrinsic::ppc_qpx_qvlfda:
- case Intrinsic::ppc_qpx_qvlfsa:
- case Intrinsic::ppc_qpx_qvlfcda:
- case Intrinsic::ppc_qpx_qvlfcsa:
- case Intrinsic::ppc_qpx_qvlfiwaa:
- case Intrinsic::ppc_qpx_qvlfiwza: {
- EVT VT;
- switch (Intrinsic) {
- case Intrinsic::ppc_qpx_qvlfda:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvlfsa:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvlfcda:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvlfcsa:
- VT = MVT::v2f32;
- break;
- default:
- VT = MVT::v4i32;
- break;
- }
-
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = VT;
- Info.ptrVal = I.getArgOperand(0);
- Info.offset = 0;
- Info.size = VT.getStoreSize();
- Info.align = Align(1);
- Info.flags = MachineMemOperand::MOLoad;
- return true;
- }
- case Intrinsic::ppc_qpx_qvstfd:
- case Intrinsic::ppc_qpx_qvstfs:
- case Intrinsic::ppc_qpx_qvstfcd:
- case Intrinsic::ppc_qpx_qvstfcs:
- case Intrinsic::ppc_qpx_qvstfiw:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_altivec_stvebx:
case Intrinsic::ppc_altivec_stvehx:
case Intrinsic::ppc_altivec_stvewx:
case Intrinsic::ppc_vsx_stxvd2x:
- case Intrinsic::ppc_vsx_stxvw4x: {
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x_be:
+ case Intrinsic::ppc_vsx_stxvw4x_be:
+ case Intrinsic::ppc_vsx_stxvl:
+ case Intrinsic::ppc_vsx_stxvll: {
EVT VT;
switch (Intrinsic) {
case Intrinsic::ppc_altivec_stvebx:
@@ -16159,20 +15428,9 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
VT = MVT::i32;
break;
case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvd2x_be:
VT = MVT::v2f64;
break;
- case Intrinsic::ppc_qpx_qvstfd:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvstfs:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvstfcd:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvstfcs:
- VT = MVT::v2f32;
- break;
default:
VT = MVT::v4i32;
break;
@@ -16187,39 +15445,6 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOStore;
return true;
}
- case Intrinsic::ppc_qpx_qvstfda:
- case Intrinsic::ppc_qpx_qvstfsa:
- case Intrinsic::ppc_qpx_qvstfcda:
- case Intrinsic::ppc_qpx_qvstfcsa:
- case Intrinsic::ppc_qpx_qvstfiwa: {
- EVT VT;
- switch (Intrinsic) {
- case Intrinsic::ppc_qpx_qvstfda:
- VT = MVT::v4f64;
- break;
- case Intrinsic::ppc_qpx_qvstfsa:
- VT = MVT::v4f32;
- break;
- case Intrinsic::ppc_qpx_qvstfcda:
- VT = MVT::v2f64;
- break;
- case Intrinsic::ppc_qpx_qvstfcsa:
- VT = MVT::v2f32;
- break;
- default:
- VT = MVT::v4i32;
- break;
- }
-
- Info.opc = ISD::INTRINSIC_VOID;
- Info.memVT = VT;
- Info.ptrVal = I.getArgOperand(1);
- Info.offset = 0;
- Info.size = VT.getStoreSize();
- Info.align = Align(1);
- Info.flags = MachineMemOperand::MOStore;
- return true;
- }
default:
break;
}
@@ -16232,14 +15457,6 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
EVT PPCTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
- // When expanding a memset, require at least two QPX instructions to cover
- // the cost of loading the value to be stored from the constant pool.
- if (Subtarget.hasQPX() && Op.size() >= 32 &&
- (Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) &&
- !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
- return MVT::v4f64;
- }
-
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
if (Subtarget.hasAltivec() && Op.size() >= 16 &&
@@ -16358,6 +15575,33 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
+bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const {
+ // Check integral scalar types.
+ if (!VT.isScalarInteger())
+ return false;
+ if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
+ if (!ConstNode->getAPIntValue().isSignedIntN(64))
+ return false;
+ // This transformation will generate >= 2 operations. But the following
+ // cases will generate <= 2 instructions during ISEL. So exclude them.
+ // 1. If the constant multiplier fits 16 bits, it can be handled by one
+ // HW instruction, ie. MULLI
+ // 2. If the multiplier after shifted fits 16 bits, an extra shift
+ // instruction is needed than case 1, ie. MULLI and RLDICR
+ int64_t Imm = ConstNode->getSExtValue();
+ unsigned Shift = countTrailingZeros<uint64_t>(Imm);
+ Imm >>= Shift;
+ if (isInt<16>(Imm))
+ return false;
+ uint64_t UImm = static_cast<uint64_t>(Imm);
+ if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
+ isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
+ return true;
+ }
+ return false;
+}
+
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
return isFMAFasterThanFMulAndFAdd(
@@ -16377,31 +15621,56 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
}
}
-// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
-// FIXME: add more patterns which are profitable to hoist.
+// FIXME: add more patterns which are not profitable to hoist.
bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
- if (I->getOpcode() != Instruction::FMul)
- return true;
-
if (!I->hasOneUse())
return true;
Instruction *User = I->user_back();
assert(User && "A single use instruction with no uses.");
- if (User->getOpcode() != Instruction::FSub &&
- User->getOpcode() != Instruction::FAdd)
- return true;
+ switch (I->getOpcode()) {
+ case Instruction::FMul: {
+ // Don't break FMA, PowerPC prefers FMA.
+ if (User->getOpcode() != Instruction::FSub &&
+ User->getOpcode() != Instruction::FAdd)
+ return true;
- const TargetOptions &Options = getTargetMachine().Options;
- const Function *F = I->getFunction();
- const DataLayout &DL = F->getParent()->getDataLayout();
- Type *Ty = User->getOperand(0)->getType();
+ const TargetOptions &Options = getTargetMachine().Options;
+ const Function *F = I->getFunction();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ Type *Ty = User->getOperand(0)->getType();
+
+ return !(
+ isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+ }
+ case Instruction::Load: {
+ // Don't break "store (load float*)" pattern, this pattern will be combined
+ // to "store (load int32)" in later InstCombine pass. See function
+ // combineLoadToOperationType. On PowerPC, loading a float point takes more
+ // cycles than loading a 32 bit integer.
+ LoadInst *LI = cast<LoadInst>(I);
+ // For the loads that combineLoadToOperationType does nothing, like
+ // ordered load, it should be profitable to hoist them.
+ // For swifterror load, it can only be used for pointer to pointer type, so
+ // later type check should get rid of this case.
+ if (!LI->isUnordered())
+ return true;
+
+ if (User->getOpcode() != Instruction::Store)
+ return true;
+
+ if (I->getType()->getTypeID() != Type::FloatTyID)
+ return true;
- return !(
- isFMAFasterThanFMulAndFAdd(*F, Ty) &&
- isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+ return false;
+ }
+ default:
+ return true;
+ }
+ return true;
}
const MCPhysReg *
@@ -16433,7 +15702,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
if (VT == MVT::v2i64)
return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
- if (Subtarget.hasVSX() || Subtarget.hasQPX())
+ if (Subtarget.hasVSX())
return true;
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
@@ -16479,8 +15748,7 @@ SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
switch (Opc) {
case PPCISD::FNMSUB:
- // TODO: QPX subtarget is deprecated. No transformation here.
- if (!Op.hasOneUse() || !isTypeLegal(VT) || Subtarget.hasQPX())
+ if (!Op.hasOneUse() || !isTypeLegal(VT))
break;
const TargetOptions &Options = getTargetMachine().Options;
@@ -16609,10 +15877,10 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
SDValue N0 = N->getOperand(0);
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!Subtarget.isISA3_0() ||
+ if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
N0.getOpcode() != ISD::SIGN_EXTEND ||
- N0.getOperand(0).getValueType() != MVT::i32 ||
- CN1 == nullptr || N->getValueType(0) != MVT::i64)
+ N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
+ N->getValueType(0) != MVT::i64)
return SDValue();
// We can't save an operation here if the value is already extended, and
@@ -16961,8 +16229,7 @@ SDValue PPCTargetLowering::combineFMALike(SDNode *N,
bool LegalOps = !DCI.isBeforeLegalizeOps();
SDLoc Loc(N);
- // TODO: QPX subtarget is deprecated. No transformation here.
- if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT))
+ if (!isOperationLegal(ISD::FMA, VT))
return SDValue();
// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 768eaa43e013..477105bd03ac 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -89,6 +89,12 @@ namespace llvm {
FRE,
FRSQRTE,
+ /// Test instruction for software square root.
+ FTSQRT,
+
+ /// Square root instruction.
+ FSQRT,
+
/// VPERM - The PPC VPERM Instruction.
///
VPERM,
@@ -146,8 +152,7 @@ namespace llvm {
/// probed.
PROBED_ALLOCA,
- /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
- /// at function entry, used for PIC code.
+ /// The result of the mflr at function entry, used for PIC code.
GlobalBaseReg,
/// These nodes represent PPC shifts.
@@ -265,11 +270,11 @@ namespace llvm {
/// is VCMPGTSH.
VCMP,
- /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
- /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*_rec instructions. For lack of better number, we use the
/// opcode number encoding for the OPC field to identify the compare. For
/// example, 838 is VCMPGTSH.
- VCMPo,
+ VCMP_rec,
/// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
/// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
@@ -381,6 +386,10 @@ namespace llvm {
/// sym\@got\@dtprel\@l.
ADDI_DTPREL_L,
+ /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS
+ /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel.
+ PADDI_DTPREL,
+
/// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
/// during instruction selection to optimize a BUILD_VECTOR into
/// operations on splats. This is necessary to avoid losing these
@@ -427,22 +436,6 @@ namespace llvm {
/// => VABSDUW((XVNEGSP a), (XVNEGSP b))
VABSD,
- /// QVFPERM = This corresponds to the QPX qvfperm instruction.
- QVFPERM,
-
- /// QVGPCI = This corresponds to the QPX qvgpci instruction.
- QVGPCI,
-
- /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
- QVALIGNI,
-
- /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
- QVESPLATI,
-
- /// QBFLT = Access the underlying QPX floating-point boolean
- /// representation.
- QBFLT,
-
/// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
/// lower (IDX=1) half of v4f32 to v2f64.
FP_EXTEND_HALF,
@@ -452,6 +445,46 @@ namespace llvm {
/// PLD.
MAT_PCREL_ADDR,
+ /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for
+ /// TLS global address when using dynamic access models. This can be done
+ /// through an add like PADDI.
+ TLS_DYNAMIC_MAT_PCREL_ADDR,
+
+ /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address
+ /// when using local exec access models, and when prefixed instructions are
+ /// available. This is used with ADD_TLS to produce an add like PADDI.
+ TLS_LOCAL_EXEC_MAT_ADDR,
+
+ /// ACC_BUILD = Build an accumulator register from 4 VSX registers.
+ ACC_BUILD,
+
+ /// PAIR_BUILD = Build a vector pair register from 2 VSX registers.
+ PAIR_BUILD,
+
+ /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of
+ /// an accumulator or pair register. This node is needed because
+ /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same
+ /// element type.
+ EXTRACT_VSX_REG,
+
+ /// XXMFACC = This corresponds to the xxmfacc instruction.
+ XXMFACC,
+
+ // Constrained conversion from floating point to int
+ STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+ STRICT_FCTIWZ,
+ STRICT_FCTIDUZ,
+ STRICT_FCTIWUZ,
+
+ /// Constrained integer-to-floating-point conversion instructions.
+ STRICT_FCFID,
+ STRICT_FCFIDU,
+ STRICT_FCFIDS,
+ STRICT_FCFIDUS,
+
+ /// Constrained floating point add in round-to-zero mode.
+ STRICT_FADDRTZ,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -493,6 +526,12 @@ namespace llvm {
/// an xxswapd.
LXVD2X,
+ /// LXVRZX - Load VSX Vector Rightmost and Zero Extend
+ /// This node represents v1i128 BUILD_VECTOR of a zero extending load
+ /// instruction from <byte, halfword, word, or doubleword> to i128.
+ /// Allows utilization of the Load VSX Vector Rightmost Instructions.
+ LXVRZX,
+
/// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
/// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
/// the vector type to load vector in big-endian element order.
@@ -519,10 +558,6 @@ namespace llvm {
/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,
- /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
- /// The 4xf32 load used for v4i1 constants.
- QVLFSb,
-
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
/// except they ensure that the compare input is zero-extended for
/// sub-word versions because the atomic loads zero-extend.
@@ -627,10 +662,6 @@ namespace llvm {
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
- /// If this is a qvaligni shuffle mask, return the shift
- /// amount, otherwise return -1.
- int isQVALIGNIShuffleMask(SDNode *N);
-
} // end namespace PPC
class PPCTargetLowering : public TargetLowering {
@@ -740,6 +771,8 @@ namespace llvm {
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG,
MaybeAlign EncodingAlignment) const;
+ bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
@@ -895,6 +928,9 @@ namespace llvm {
return true;
}
+ bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const override;
+
bool isDesirableToTransformToIntegerOp(unsigned Opc,
EVT VT) const override {
// Only handle float load/store pair because float(fpr) load/store
@@ -980,11 +1016,6 @@ namespace llvm {
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
- /// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a
- /// specific type is cheaper than a multiply followed by a shift.
- /// This is true for words and doublewords on 64-bit PowerPC.
- bool isMulhCheaperThanMulShift(EVT Type) const override;
-
/// Override to support customized stack guard loading.
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;
@@ -1042,11 +1073,6 @@ namespace llvm {
}
};
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Addrspacecasts are always noops.
- return true;
- }
-
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
@@ -1117,19 +1143,18 @@ namespace llvm {
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
@@ -1176,10 +1201,6 @@ namespace llvm {
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
- SDValue LowerFormalArguments_Darwin(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerFormalArguments_64SVR4(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -1194,13 +1215,6 @@ namespace llvm {
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
const SDLoc &dl) const;
- SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, CallFlags CFlags,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- const CallBase *CB) const;
SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
@@ -1257,6 +1271,10 @@ namespace llvm {
bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;
+ SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
+ const DenormalMode &Mode) const override;
+ SDValue getSqrtResultForDenormInput(SDValue Operand,
+ SelectionDAG &DAG) const override;
unsigned combineRepeatedFPDivisors() const override;
SDValue
@@ -1295,6 +1313,8 @@ namespace llvm {
bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+ bool isIntS34Immediate(SDNode *N, int64_t &Imm);
+ bool isIntS34Immediate(SDValue Op, int64_t &Imm);
bool convertToNonDenormSingle(APInt &ArgAPInt);
bool convertToNonDenormSingle(APFloat &ArgAPFloat);
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 1c457d4170d5..03e9d6970a30 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -19,12 +19,14 @@ def s16imm64 : Operand<i64> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def u16imm64 : Operand<i64> {
let PrintMethod = "printU16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def s17imm64 : Operand<i64> {
// This operand type is used for addis/lis to allow the assembler parser
@@ -34,6 +36,7 @@ def s17imm64 : Operand<i64> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def tocentry : Operand<iPTR> {
let MIOperandInfo = (ops i64imm:$imm);
@@ -148,6 +151,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
def BL8_NOTOC : IForm<18, 0, 1, (outs),
(ins calltarget:$func),
"bl $func", IIC_BrB, []>;
+ def BL8_NOTOC_TLS : IForm<18, 0, 1, (outs),
+ (ins tlscall:$func),
+ "bl $func", IIC_BrB, []>;
}
}
let Uses = [CTR8, RM] in {
@@ -431,9 +437,14 @@ def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
(ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64",
[(set i64:$result,
(PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>;
-def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
- g8rc:$sp),
+def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs
+ g8rc:$fp, g8rc:$actual_negsize),
(ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
+def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 : PPCEmitTimePseudo<(outs
+ g8rc:$fp, g8rc:$actual_negsize),
+ (ins g8rc:$negsize, memri:$fpsi),
+ "#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64", []>,
+ RegConstraint<"$actual_negsize = $negsize">;
def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
(ins i64imm:$stacksize),
"#PROBED_STACKALLOC_64", []>;
@@ -835,7 +846,7 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
}
-def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L),
+def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L),
"darn $RT, $L", IIC_LdStLD>, isPPC64;
def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),
"addpcis $RT, $D", IIC_BrB, []>, isPPC64;
@@ -976,8 +987,11 @@ def : InstAlias<"cntlzw. $rA, $rS", (CNTLZW8_rec g8rc:$rA, g8rc:$rS)>;
def : InstAlias<"mtxer $Rx", (MTSPR8 1, g8rc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR8 g8rc:$Rx, 1)>;
-def : InstAlias<"mtudscr $Rx", (MTSPR8 3, g8rc:$Rx)>;
-def : InstAlias<"mfudscr $Rx", (MFSPR8 g8rc:$Rx, 3)>;
+//Disable this alias on AIX for now because as does not support them.
+let Predicates = [ModernAs] in {
+ def : InstAlias<"mtudscr $Rx", (MTSPR8 3, g8rc:$Rx)>;
+ def : InstAlias<"mfudscr $Rx", (MFSPR8 g8rc:$Rx, 3)>;
+}
def : InstAlias<"mfrtcu $Rx", (MFSPR8 g8rc:$Rx, 4)>;
def : InstAlias<"mfrtcl $Rx", (MFSPR8 g8rc:$Rx, 5)>;
@@ -1021,8 +1035,8 @@ def : InstAlias<"mfamr $Rx", (MFSPR8 g8rc:$Rx, 29)>;
foreach SPRG = 0-3 in {
def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
- def : InstAlias<"mfsprg "#SPRG#", $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
- def : InstAlias<"mfsprg"#SPRG#" $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
}
def : InstAlias<"mfasr $RT", (MFSPR8 g8rc:$RT, 280)>;
@@ -1051,7 +1065,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
"lwa $rD, $src", IIC_LdStLWA,
[(set i64:$rD,
- (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64,
+ (DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
@@ -1162,7 +1176,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
- [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64;
+ [(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64;
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1257,17 +1271,36 @@ def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm6
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+
+class GETtlsADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ asmstr,
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+class GETtlsldADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ asmstr,
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1 in {
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here.
// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be
// correct because the branch select pass is relying on it.
-let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
- Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsADDR",
- [(set i64:$rD,
- (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
+let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in
+def GETtlsADDR : GETtlsADDRPseudo <"#GETtlsADDR">;
+let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in
+def GETtlsADDRPCREL : GETtlsADDRPseudo <"#GETtlsADDRPCREL">;
+
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR : GETtlsldADDRPseudo <"#GETtlsldADDR">;
+let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">;
+}
+
// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
@@ -1291,15 +1324,6 @@ def ADDItlsldL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm6
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
-// explicitly defined when this op is created, so not mentioned here.
-let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
- Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsldADDR",
- [(set i64:$rD,
- (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8
// are true defines, while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
@@ -1324,6 +1348,11 @@ def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm
[(set i64:$rD,
(PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
+def PADDIdtprel : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#PADDIdtprel",
+ [(set i64:$rD,
+ (PPCpaddiDtprel i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
@@ -1354,7 +1383,7 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
"std $rS, $dst", IIC_LdStSTD,
- [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64;
+ [(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64;
def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
"stdx $rS, $dst", IIC_LdStSTD,
[(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
@@ -1421,7 +1450,7 @@ def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
-def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+def : Pat<(DSFormPreStore i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
(STDU $rS, iaddroff:$ptroff, $ptrreg)>;
def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
@@ -1439,11 +1468,11 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
//
-let PPC970_Unit = 3, hasSideEffects = 0,
+let PPC970_Unit = 3, hasSideEffects = 0, mayRaiseFPException = 1,
Uses = [RM] in { // FPU Operations.
defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
"fcfid", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fcfid f64:$frB))]>, isPPC64;
defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
"fctid", "$frD, $frB", IIC_FPGeneral,
[]>, isPPC64;
@@ -1452,23 +1481,23 @@ defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB),
[]>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
"fctidz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fctidz f64:$frB))]>, isPPC64;
defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
"fcfidu", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fcfidu f64:$frB))]>, isPPC64;
defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
"fcfids", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+ [(set f32:$frD, (PPCany_fcfids f64:$frB))]>, isPPC64;
defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
"fcfidus", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+ [(set f32:$frD, (PPCany_fcfidus f64:$frB))]>, isPPC64;
defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiduz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fctiduz f64:$frB))]>, isPPC64;
defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwuz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCany_fctiwuz f64:$frB))]>, isPPC64;
}
@@ -1565,11 +1594,11 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
// Patterns to match r+r indexed loads and stores for
// addresses without at least 4-byte alignment.
-def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)),
(LWAX xoaddr:$src)>;
-def : Pat<(i64 (unaligned4load xoaddr:$src)),
+def : Pat<(i64 (NonDSFormLoad xoaddr:$src)),
(LDX xoaddr:$src)>;
-def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>;
// 64-bits atomic loads and stores
@@ -1580,6 +1609,11 @@ def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr
def : Pat<(atomic_store_64 xaddrX4:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
let Predicates = [IsISA3_0] in {
+// DARN (deliver random number)
+// L=0 for 32-bit, L=1 for conditioned random, L=2 for raw random
+def : Pat<(int_ppc_darn32), (EXTRACT_SUBREG (DARN 0), sub_32)>;
+def : Pat<(int_ppc_darn), (DARN 1)>;
+def : Pat<(int_ppc_darnraw), (DARN 2)>;
class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty,
InstrItinClass itin, list<dag> pattern>
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 920eeed9d41f..1a34aa09315b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -404,12 +404,14 @@ let isCodeGenOnly = 1 in {
Deprecated<DeprecatedDST>;
}
-def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
- "mfvscr $vD", IIC_LdStStore,
- [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
-def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
- "mtvscr $vB", IIC_LdStLoad,
- [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+let hasSideEffects = 1 in {
+ def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
+ "mfvscr $vD", IIC_LdStStore,
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
+ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
+ "mtvscr $vB", IIC_LdStLoad,
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+}
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),
@@ -469,10 +471,11 @@ def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP,
[(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
(fneg v4f32:$vB))))]>;
-
-def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
-def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
- v8i16>;
+let hasSideEffects = 1 in {
+ def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+ def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+ v8i16>;
+}
def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
} // isCommutable
@@ -608,14 +611,16 @@ def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
v4i32, v16i8, v4i32>;
def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
v4i32, v8i16, v4i32>;
-def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
- v4i32, v8i16, v4i32>;
def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
v4i32, v16i8, v4i32>;
def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
v4i32, v8i16, v4i32>;
-def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
- v4i32, v8i16, v4i32>;
+let hasSideEffects = 1 in {
+ def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+ v4i32, v8i16, v4i32>;
+ def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+ v4i32, v8i16, v4i32>;
+}
let isCommutable = 1 in {
def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
@@ -665,15 +670,17 @@ def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
-def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
-def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+let hasSideEffects = 1 in {
+ def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+ def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
-def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
- v4i32, v16i8, v4i32>;
-def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
- v4i32, v8i16, v4i32>;
-def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
- v4i32, v16i8, v4i32>;
+ def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+ v4i32, v16i8, v4i32>;
+ def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+ v4i32, v8i16, v4i32>;
+ def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+ v4i32, v16i8, v4i32>;
+}
def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vnor $vD, $vA, $vB", IIC_VecFP,
@@ -742,26 +749,28 @@ def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
// Vector Pack.
def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
v8i16, v4i32>;
-def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
- v16i8, v8i16>;
-def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
- v16i8, v8i16>;
-def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
- v8i16, v4i32>;
-def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
- v8i16, v4i32>;
+let hasSideEffects = 1 in {
+ def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+ v16i8, v8i16>;
+ def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+ v16i8, v8i16>;
+ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+ v8i16, v4i32>;
+ def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+ v8i16, v4i32>;
+ def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+ v16i8, v8i16>;
+ def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+ v8i16, v4i32>;
+}
def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpkuhum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
- v16i8, v8i16>;
def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpkuwum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
- v8i16, v4i32>;
// Vector Unpack.
def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
@@ -784,47 +793,47 @@ class VCMP<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
IIC_VecFPCompare,
[(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
-class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+class VCMP_rec<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
IIC_VecFPCompare,
- [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
+ [(set Ty:$vD, (Ty (PPCvcmp_rec Ty:$vA, Ty:$vB, xo)))]> {
let Defs = [CR6];
let RC = 1;
}
// f32 element comparisons.0
def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
-def VCMPBFP_rec : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPBFP_rec : VCMP_rec<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
-def VCMPEQFP_rec : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPEQFP_rec : VCMP_rec<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
-def VCMPGEFP_rec : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP_rec : VCMP_rec<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
-def VCMPGTFP_rec : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP_rec : VCMP_rec<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
// i8 element comparisons.
def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
-def VCMPEQUB_rec : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPEQUB_rec : VCMP_rec< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
-def VCMPGTSB_rec : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB_rec : VCMP_rec<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
-def VCMPGTUB_rec : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB_rec : VCMP_rec<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
// i16 element comparisons.
def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
-def VCMPEQUH_rec : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPEQUH_rec : VCMP_rec< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
-def VCMPGTSH_rec : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH_rec : VCMP_rec<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
-def VCMPGTUH_rec : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH_rec : VCMP_rec<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
// i32 element comparisons.
def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
-def VCMPEQUW_rec : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPEQUW_rec : VCMP_rec<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
-def VCMPGTSW_rec : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW_rec : VCMP_rec<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
-def VCMPGTUW_rec : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW_rec : VCMP_rec<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
@@ -933,6 +942,18 @@ def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v16i8 VRRC:$src))), (f128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 VRRC:$src))), (f128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 VRRC:$src))), (f128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 VRRC:$src))), (f128 VRRC:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 VRRC:$src))), (f128 VRRC:$src)>;
+
+def : Pat<(v16i8 (bitconvert (f128 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (f128 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (f128 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (f128 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v2f64 (bitconvert (f128 VRRC:$src))), (v2f64 VRRC:$src)>;
+
// Max/Min
def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)),
(v16i8 (VMAXUB $src1, $src2))>;
@@ -1291,11 +1312,11 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
// i64 element comparisons.
def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
-def VCMPEQUD_rec : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPEQUD_rec : VCMP_rec<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
-def VCMPGTSD_rec : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD_rec : VCMP_rec<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
-def VCMPGTUD_rec : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD_rec : VCMP_rec<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
// The cryptography instructions that do not require Category:Vector.Crypto
def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
@@ -1306,20 +1327,22 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
int_ppc_altivec_crypto_vpmsumw, v4i32>;
def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
int_ppc_altivec_crypto_vpmsumd, v2i64>;
-def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
- int_ppc_altivec_crypto_vpermxor, v16i8>;
+def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC),
+ "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>;
// Vector doubleword integer pack and unpack.
-def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss,
- v4i32, v2i64>;
-def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus,
- v4i32, v2i64>;
+let hasSideEffects = 1 in {
+ def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss,
+ v4i32, v2i64>;
+ def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus,
+ v4i32, v2i64>;
+ def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus,
+ v4i32, v2i64>;
+}
def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpkudum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus,
- v4i32, v2i64>;
def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
v2i64, v4i32>;
def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
@@ -1363,21 +1386,21 @@ def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
-def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
+def VCMPNEB_rec : VCMP_rec < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
def VCMPNEZB : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
-def VCMPNEZB_rec : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
+def VCMPNEZB_rec : VCMP_rec<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
// i16 element comparisons.
def VCMPNEH : VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>;
-def VCMPNEH_rec : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
+def VCMPNEH_rec : VCMP_rec< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
def VCMPNEZH : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
-def VCMPNEZH_rec : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
+def VCMPNEZH_rec : VCMP_rec<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
// i32 element comparisons.
def VCMPNEW : VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>;
-def VCMPNEW_rec : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
+def VCMPNEW_rec : VCMP_rec<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
def VCMPNEZW : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
-def VCMPNEZW_rec : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
+def VCMPNEZW_rec : VCMP_rec<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
// VX-Form: [PO VRT / UIM VRB XO].
// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
@@ -1449,11 +1472,16 @@ def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd",
[(set v2i64:$vD, (cttz v2i64:$vB))]>;
// Vector Extend Sign
-def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>;
-def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>;
-def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>;
-def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>;
-def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>;
+def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w",
+ [(set v4i32:$vD, (int_ppc_altivec_vextsb2w v16i8:$vB))]>;
+def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w",
+ [(set v4i32:$vD, (int_ppc_altivec_vextsh2w v8i16:$vB))]>;
+def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d",
+ [(set v2i64:$vD, (int_ppc_altivec_vextsb2d v16i8:$vB))]>;
+def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d",
+ [(set v2i64:$vD, (int_ppc_altivec_vextsh2d v8i16:$vB))]>;
+def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d",
+ [(set v2i64:$vD, (int_ppc_altivec_vextsw2d v4i32:$vB))]>;
let isCodeGenOnly = 1 in {
def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>;
def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 632d4d9deb8a..646efe64a22c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -637,12 +637,12 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
}
class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin>
+ InstrItinClass itin, list<dag> pattern>
: XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {
let FRA = 0;
+ let Pattern = pattern;
}
-// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -1781,14 +1781,6 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
-// Used for QPX
-class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let FRA = 0;
- let FRC = 0;
-}
-
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -2099,49 +2091,6 @@ class VX_RD5_RSp5_PS1_XO9<bits<9> xo, dag OOL, dag IOL, string asmstr,
let Inst{23-31} = xo;
}
-// Z23-Form (used by QPX)
-class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> FRT;
- bits<5> FRA;
- bits<5> FRB;
- bits<2> idx;
-
- let Pattern = pattern;
-
- bit RC = 0; // set by isRecordForm
-
- let Inst{6-10} = FRT;
- let Inst{11-15} = FRA;
- let Inst{16-20} = FRB;
- let Inst{21-22} = idx;
- let Inst{23-30} = xo;
- let Inst{31} = RC;
-}
-
-class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let FRB = 0;
-}
-
-class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> FRT;
- bits<12> idx;
-
- let Pattern = pattern;
-
- bit RC = 0; // set by isRecordForm
-
- let Inst{6-10} = FRT;
- let Inst{11-22} = idx;
- let Inst{23-30} = xo;
- let Inst{31} = RC;
-}
-
class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 992ad8216f3b..e59a08774dc5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -164,9 +164,8 @@ def : Pat<(int_ppc_tsuspend),
(TSR 0)>;
def : Pat<(i64 (int_ppc_ttest)),
- (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (TABORTWCI 0, (LI 0), 0), sub_32)),
- 36, 28)>;
+ (i64 (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), (TABORTWCI 0, (LI 0), 0), sub_32))>;
} // [HasHTM]
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 11c97210ead9..9e3c6c569bd7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -21,12 +21,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -73,6 +76,14 @@ static cl::opt<bool>
UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
cl::desc("Use the old (incorrect) instruction latency calculation"));
+static cl::opt<float>
+ FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
+ cl::desc("register pressure factor for the transformations."));
+
+static cl::opt<bool> EnableFMARegPressureReduction(
+ "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
+ cl::desc("enable register pressure reduce in machine combiner pass."));
+
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
@@ -259,14 +270,6 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case PPC::XVMULDP:
case PPC::XVMULSP:
case PPC::XSMULSP:
- // QPX Add:
- case PPC::QVFADD:
- case PPC::QVFADDS:
- case PPC::QVFADDSs:
- // QPX Multiply:
- case PPC::QVFMUL:
- case PPC::QVFMULS:
- case PPC::QVFMULSs:
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
// Fixed point:
@@ -286,23 +289,23 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
#define InfoArrayIdxFMULInst 2
#define InfoArrayIdxAddOpIdx 3
#define InfoArrayIdxMULOpIdx 4
+#define InfoArrayIdxFSubInst 5
// Array keeps info for FMA instructions:
// Index 0(InfoArrayIdxFMAInst): FMA instruction;
-// Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA;
-// Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA;
+// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
+// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
-// second MUL operand index is plus 1.
-static const uint16_t FMAOpIdxInfo[][5] = {
+// second MUL operand index is plus 1;
+// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
+static const uint16_t FMAOpIdxInfo[][6] = {
// FIXME: Add more FMA instructions like XSNMADDADP and so on.
- {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2},
- {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2},
- {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
- {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
- {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
- {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
- {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
- {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
+ {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
+ {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
+ {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
+ {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
+ {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
+ {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
// Check if an opcode is a FMA instruction. If it is, return the index in array
// FMAOpIdxInfo. Otherwise, return -1.
@@ -313,6 +316,8 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
return -1;
}
+// On PowerPC target, we have two kinds of patterns related to FMA:
+// 1: Improve ILP.
// Try to reassociate FMA chains like below:
//
// Pattern 1:
@@ -336,11 +341,35 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
//
// breaking the dependency between A and B, allowing FMA to be executed in
// parallel (or back-to-back in a pipeline) instead of depending on each other.
+//
+// 2: Reduce register pressure.
+// Try to reassociate FMA with FSUB and a constant like below:
+// C is a floatint point const.
+//
+// Pattern 1:
+// A = FSUB X, Y (Leaf)
+// D = FMA B, C, A (Root)
+// -->
+// A = FMA B, Y, -C
+// D = FMA A, X, C
+//
+// Pattern 2:
+// A = FSUB X, Y (Leaf)
+// D = FMA B, A, C (Root)
+// -->
+// A = FMA B, Y, -C
+// D = FMA A, X, C
+//
+// Before the transformation, A must be assigned with different hardware
+// register with D. After the transformation, A and D must be assigned with
+// same hardware register due to TIE attricute of FMA instructions.
+//
bool PPCInstrInfo::getFMAPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
MachineBasicBlock *MBB = Root.getParent();
- const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
for (const auto &MO : Instr.explicit_operands())
@@ -349,16 +378,35 @@ bool PPCInstrInfo::getFMAPatterns(
return true;
};
- auto IsReassociable = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
- bool IsLeaf, bool IsAdd) {
- int16_t Idx = -1;
- if (!IsAdd) {
- Idx = getFMAOpIdxInfo(Instr.getOpcode());
- if (Idx < 0)
- return false;
- } else if (Instr.getOpcode() !=
- FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())]
- [InfoArrayIdxFAddInst])
+ auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
+ unsigned OpType) {
+ if (Instr.getOpcode() !=
+ FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
+ return false;
+
+ // Instruction can be reassociated.
+ // fast math flags may prohibit reassociation.
+ if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
+ return false;
+
+ // Instruction operands are virtual registers for reassociation.
+ if (!IsAllOpsVirtualReg(Instr))
+ return false;
+
+ // For register pressure reassociation, the FSub must have only one use as
+ // we want to delete the sub to save its def.
+ if (OpType == InfoArrayIdxFSubInst &&
+ !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
+ return false;
+
+ return true;
+ };
+
+ auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
+ int16_t &MulOpIdx, bool IsLeaf) {
+ int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
+ if (Idx < 0)
return false;
// Instruction can be reassociated.
@@ -371,65 +419,356 @@ bool PPCInstrInfo::getFMAPatterns(
if (!IsAllOpsVirtualReg(Instr))
return false;
- if (IsAdd && IsLeaf)
+ MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
+ if (IsLeaf)
return true;
AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
- MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg());
+ MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
// If 'add' operand's def is not in current block, don't do ILP related opt.
if (!MIAdd || MIAdd->getParent() != MBB)
return false;
// If this is not Leaf FMA Instr, its 'add' operand should only have one use
// as this fma will be changed later.
- return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg());
+ return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
};
int16_t AddOpIdx = -1;
+ int16_t MulOpIdx = -1;
+
+ bool IsUsedOnceL = false;
+ bool IsUsedOnceR = false;
+ MachineInstr *MULInstrL = nullptr;
+ MachineInstr *MULInstrR = nullptr;
+
+ auto IsRPReductionCandidate = [&]() {
+ // Currently, we only support float and double.
+ // FIXME: add support for other types.
+ unsigned Opcode = Root.getOpcode();
+ if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
+ return false;
+
+ // Root must be a valid FMA like instruction.
+ // Treat it as leaf as we don't care its add operand.
+ if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
+ assert((MulOpIdx >= 0) && "mul operand index not right!");
+ Register MULRegL = TRI->lookThruSingleUseCopyChain(
+ Root.getOperand(MulOpIdx).getReg(), MRI);
+ Register MULRegR = TRI->lookThruSingleUseCopyChain(
+ Root.getOperand(MulOpIdx + 1).getReg(), MRI);
+ if (!MULRegL && !MULRegR)
+ return false;
+
+ if (MULRegL && !MULRegR) {
+ MULRegR =
+ TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
+ IsUsedOnceL = true;
+ } else if (!MULRegL && MULRegR) {
+ MULRegL =
+ TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
+ IsUsedOnceR = true;
+ } else {
+ IsUsedOnceL = true;
+ IsUsedOnceR = true;
+ }
+
+ if (!Register::isVirtualRegister(MULRegL) ||
+ !Register::isVirtualRegister(MULRegR))
+ return false;
+
+ MULInstrL = MRI->getVRegDef(MULRegL);
+ MULInstrR = MRI->getVRegDef(MULRegR);
+ return true;
+ }
+ return false;
+ };
+
+ // Register pressure fma reassociation patterns.
+ if (DoRegPressureReduce && IsRPReductionCandidate()) {
+ assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
+ // Register pressure pattern 1
+ if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
+ IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
+ LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA);
+ return true;
+ }
+
+ // Register pressure pattern 2
+ if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
+ IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
+ LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC);
+ return true;
+ }
+ }
+
+ // ILP fma reassociation patterns.
// Root must be a valid FMA like instruction.
- if (!IsReassociable(Root, AddOpIdx, false, false))
+ AddOpIdx = -1;
+ if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
return false;
assert((AddOpIdx >= 0) && "add operand index not right!");
Register RegB = Root.getOperand(AddOpIdx).getReg();
- MachineInstr *Prev = MRI.getUniqueVRegDef(RegB);
+ MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
// Prev must be a valid FMA like instruction.
AddOpIdx = -1;
- if (!IsReassociable(*Prev, AddOpIdx, false, false))
+ if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
return false;
assert((AddOpIdx >= 0) && "add operand index not right!");
Register RegA = Prev->getOperand(AddOpIdx).getReg();
- MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA);
+ MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
AddOpIdx = -1;
- if (IsReassociable(*Leaf, AddOpIdx, true, false)) {
+ if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+ LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
return true;
}
- if (IsReassociable(*Leaf, AddOpIdx, true, true)) {
+ if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM);
+ LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
return true;
}
return false;
}
+void PPCInstrInfo::finalizeInsInstrs(
+ MachineInstr &Root, MachineCombinerPattern &P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const {
+ assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
+
+ MachineFunction *MF = Root.getMF();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+
+ int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
+ if (Idx < 0)
+ return;
+
+ uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
+
+ // For now we only need to fix up placeholder for register pressure reduce
+ // patterns.
+ Register ConstReg = 0;
+ switch (P) {
+ case MachineCombinerPattern::REASSOC_XY_BCA:
+ ConstReg =
+ TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
+ break;
+ case MachineCombinerPattern::REASSOC_XY_BAC:
+ ConstReg =
+ TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
+ break;
+ default:
+ // Not register pressure reduce patterns.
+ return;
+ }
+
+ MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
+ // Get const value from const pool.
+ const Constant *C = getConstantFromConstantPool(ConstDefInstr);
+ assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
+
+ // Get negative fp const.
+ APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
+ F1.changeSign();
+ Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
+ Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
+
+ // Put negative fp const into constant pool.
+ unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
+
+ MachineOperand *Placeholder = nullptr;
+ // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
+ for (auto *Inst : InsInstrs) {
+ for (MachineOperand &Operand : Inst->explicit_operands()) {
+ assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
+ if (Operand.getReg() == PPC::ZERO8) {
+ Placeholder = &Operand;
+ break;
+ }
+ }
+ }
+
+ assert(Placeholder && "Placeholder does not exist!");
+
+ // Generate instructions to load the const fp from constant pool.
+ // We only support PPC64 and medium code model.
+ Register LoadNewConst =
+ generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
+
+ // Fill the placeholder with the new load from constant pool.
+ Placeholder->setReg(LoadNewConst);
+}
+
+bool PPCInstrInfo::shouldReduceRegisterPressure(
+ MachineBasicBlock *MBB, RegisterClassInfo *RegClassInfo) const {
+
+ if (!EnableFMARegPressureReduction)
+ return false;
+
+ // Currently, we only enable register pressure reducing in machine combiner
+ // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
+ // support.
+ //
+ // So we need following instructions to access a TOC entry:
+ //
+ // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
+ // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
+ // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
+ //
+ // FIXME: add more supported targets, like Small and Large code model, PPC32,
+ // AIX.
+ if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
+ Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium))
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+ auto GetMBBPressure = [&](MachineBasicBlock *MBB) -> std::vector<unsigned> {
+ RegionPressure Pressure;
+ RegPressureTracker RPTracker(Pressure);
+
+ // Initialize the register pressure tracker.
+ RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
+ /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
+
+ for (MachineBasicBlock::iterator MII = MBB->instr_end(),
+ MIE = MBB->instr_begin();
+ MII != MIE; --MII) {
+ MachineInstr &MI = *std::prev(MII);
+ if (MI.isDebugValue() || MI.isDebugLabel())
+ continue;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, false, false);
+ RPTracker.recedeSkipDebugValues();
+ assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
+ RPTracker.recede(RegOpers);
+ }
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ return RPTracker.getPressure().MaxSetPressure;
+ };
+
+ // For now we only care about float and double type fma.
+ unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
+ *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
+
+ // Only reduce register pressure when pressure is high.
+ return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
+ (float)VSSRCLimit * FMARPFactor;
+}
+
+bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr *I) const {
+ // I has only one memory operand which is load from constant pool.
+ if (!I->hasOneMemOperand())
+ return false;
+
+ MachineMemOperand *Op = I->memoperands()[0];
+ return Op->isLoad() && Op->getPseudoValue() &&
+ Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
+}
+
+Register PPCInstrInfo::generateLoadForNewConst(
+ unsigned Idx, MachineInstr *MI, Type *Ty,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const {
+ // Now we only support PPC64, Medium code model and P9 with vector.
+ // We have immutable pattern to access const pool. See function
+ // shouldReduceRegisterPressure.
+ assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
+ Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) &&
+ "Target not supported!\n");
+
+ MachineFunction *MF = MI->getMF();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+ // Generate ADDIStocHA8
+ Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ MachineInstrBuilder TOCOffset =
+ BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
+ .addReg(PPC::X2)
+ .addConstantPoolIndex(Idx);
+
+ assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
+ "Only float and double are supported!");
+
+ unsigned LoadOpcode;
+ // Should be float type or double type.
+ if (Ty->isFloatTy())
+ LoadOpcode = PPC::DFLOADf32;
+ else
+ LoadOpcode = PPC::DFLOADf64;
+
+ const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
+ Register VReg2 = MRI->createVirtualRegister(RC);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad,
+ Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty));
+
+ // Generate Load from constant pool.
+ MachineInstrBuilder Load =
+ BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
+ .addConstantPoolIndex(Idx)
+ .addReg(VReg1, getKillRegState(true))
+ .addMemOperand(MMO);
+
+ Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
+
+ // Insert the toc load instructions into InsInstrs.
+ InsInstrs.insert(InsInstrs.begin(), Load);
+ InsInstrs.insert(InsInstrs.begin(), TOCOffset);
+ return VReg2;
+}
+
+// This function returns the const value in constant pool if the \p I is a load
+// from constant pool.
+const Constant *
+PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const {
+ MachineFunction *MF = I->getMF();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ assert(I->mayLoad() && "Should be a load instruction.\n");
+ for (auto MO : I->uses()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0 || !Register::isVirtualRegister(Reg))
+ continue;
+ // Find the toc address.
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ for (auto MO2 : DefMI->uses())
+ if (MO2.isCPI())
+ return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
+ }
+ return nullptr;
+}
+
bool PPCInstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
// Using the machine combiner in this way is potentially expensive, so
// restrict to when aggressive optimizations are desired.
if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
return false;
- if (getFMAPatterns(Root, Patterns))
+ if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
return true;
- return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+ DoRegPressureReduce);
}
void PPCInstrInfo::genAlternativeCodeSequence(
@@ -440,6 +779,8 @@ void PPCInstrInfo::genAlternativeCodeSequence(
switch (Pattern) {
case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XY_BCA:
+ case MachineCombinerPattern::REASSOC_XY_BAC:
reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
break;
default:
@@ -450,8 +791,6 @@ void PPCInstrInfo::genAlternativeCodeSequence(
}
}
-// Currently, only handle two patterns REASSOC_XY_AMM_BMM and
-// REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns.
void PPCInstrInfo::reassociateFMA(
MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -459,6 +798,7 @@ void PPCInstrInfo::reassociateFMA(
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
MachineFunction *MF = Root.getMF();
MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
MachineOperand &OpC = Root.getOperand(0);
Register RegC = OpC.getReg();
const TargetRegisterClass *RC = MRI.getRegClass(RegC);
@@ -468,13 +808,42 @@ void PPCInstrInfo::reassociateFMA(
int16_t Idx = getFMAOpIdxInfo(FmaOp);
assert(Idx >= 0 && "Root must be a FMA instruction");
+ bool IsILPReassociate =
+ (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) ||
+ (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+
uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
- MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
- MachineInstr *Leaf =
- MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
- uint16_t IntersectedFlags =
- Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
+
+ MachineInstr *Prev = nullptr;
+ MachineInstr *Leaf = nullptr;
+ switch (Pattern) {
+ default:
+ llvm_unreachable("not recognized pattern!");
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
+ Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
+ break;
+ case MachineCombinerPattern::REASSOC_XY_BAC: {
+ Register MULReg =
+ TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
+ Leaf = MRI.getVRegDef(MULReg);
+ break;
+ }
+ case MachineCombinerPattern::REASSOC_XY_BCA: {
+ Register MULReg = TRI->lookThruCopyLike(
+ Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
+ Leaf = MRI.getVRegDef(MULReg);
+ break;
+ }
+ }
+
+ uint16_t IntersectedFlags = 0;
+ if (IsILPReassociate)
+ IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
+ else
+ IntersectedFlags = Root.getFlags() & Leaf->getFlags();
auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
bool &KillFlag) {
@@ -484,36 +853,51 @@ void PPCInstrInfo::reassociateFMA(
};
auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
- Register &MulOp2, bool &MulOp1KillFlag,
- bool &MulOp2KillFlag) {
+ Register &MulOp2, Register &AddOp,
+ bool &MulOp1KillFlag, bool &MulOp2KillFlag,
+ bool &AddOpKillFlag) {
GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
+ GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
};
- Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32;
+ Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
+ RegA21, RegB;
bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
- KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false;
+ KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
+ KillA11 = false, KillA21 = false, KillB = false;
- GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32);
- GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22);
+ GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
+
+ if (IsILPReassociate)
+ GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
- GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12);
+ GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
} else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
+ } else {
+ // Get FSUB instruction info.
+ GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
+ GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
}
// Create new virtual registers for the new results instead of
// recycling legacy ones because the MachineCombiner's computation of the
// critical path requires a new register definition rather than an existing
// one.
+ // For register pressure reassociation, we only need create one virtual
+ // register for the new fma.
Register NewVRA = MRI.createVirtualRegister(RC);
InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
- Register NewVRB = MRI.createVirtualRegister(RC);
- InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
+ Register NewVRB = 0;
+ if (IsILPReassociate) {
+ NewVRB = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
+ }
Register NewVRD = 0;
if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
@@ -532,7 +916,11 @@ void PPCInstrInfo::reassociateFMA(
MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
};
- if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ MachineInstrBuilder NewARegPressure, NewCRegPressure;
+ switch (Pattern) {
+ default:
+ llvm_unreachable("not recognized pattern!");
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM: {
// Create new instructions for insertion.
MachineInstrBuilder MINewB =
BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
@@ -565,7 +953,9 @@ void PPCInstrInfo::reassociateFMA(
InsInstrs.push_back(MINewA);
InsInstrs.push_back(MINewB);
InsInstrs.push_back(MINewC);
- } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ break;
+ }
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: {
assert(NewVRD && "new FMA register not created!");
// Create new instructions for insertion.
MachineInstrBuilder MINewA =
@@ -607,6 +997,47 @@ void PPCInstrInfo::reassociateFMA(
InsInstrs.push_back(MINewB);
InsInstrs.push_back(MINewD);
InsInstrs.push_back(MINewC);
+ break;
+ }
+ case MachineCombinerPattern::REASSOC_XY_BAC:
+ case MachineCombinerPattern::REASSOC_XY_BCA: {
+ Register VarReg;
+ bool KillVarReg = false;
+ if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) {
+ VarReg = RegM31;
+ KillVarReg = KillM31;
+ } else {
+ VarReg = RegM32;
+ KillVarReg = KillM32;
+ }
+ // We don't want to get negative const from memory pool too early, as the
+ // created entry will not be deleted even if it has no users. Since all
+ // operand of Leaf and Root are virtual register, we use zero register
+ // here as a placeholder. When the InsInstrs is selected in
+ // MachineCombiner, we call finalizeInsInstrs to replace the zero register
+ // with a virtual register which is a load from constant pool.
+ NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
+ .addReg(RegB, getKillRegState(RegB))
+ .addReg(RegY, getKillRegState(KillY))
+ .addReg(PPC::ZERO8);
+ NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
+ .addReg(NewVRA, getKillRegState(true))
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(VarReg, getKillRegState(KillVarReg));
+ // For now, we only support xsmaddadp/xsmaddasp, their add operand are
+ // both at index 1, no need to adjust.
+ // FIXME: when add more fma instructions support, like fma/fmas, adjust
+ // the operand index here.
+ break;
+ }
+ }
+
+ if (!IsILPReassociate) {
+ setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
+ setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
+
+ InsInstrs.push_back(NewARegPressure);
+ InsInstrs.push_back(NewCRegPressure);
}
assert(!InsInstrs.empty() &&
@@ -614,7 +1045,8 @@ void PPCInstrInfo::reassociateFMA(
// Record old instructions for deletion.
DelInstrs.push_back(Leaf);
- DelInstrs.push_back(Prev);
+ if (IsILPReassociate)
+ DelInstrs.push_back(Prev);
DelInstrs.push_back(&Root);
}
@@ -666,7 +1098,6 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::LI8:
case PPC::LIS:
case PPC::LIS8:
- case PPC::QVGPCI:
case PPC::ADDIStocHA:
case PPC::ADDIStocHA8:
case PPC::ADDItocL:
@@ -683,6 +1114,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::V_SETALLONES:
case PPC::CRSET:
case PPC::CRUNSET:
+ case PPC::XXSETACCZ:
return true;
}
return false;
@@ -1283,14 +1715,22 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(31);
return;
} else if (PPC::CRRCRegClass.contains(SrcReg) &&
- PPC::G8RCRegClass.contains(DestReg)) {
- BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg).addReg(SrcReg);
- getKillRegState(KillSrc);
- return;
- } else if (PPC::CRRCRegClass.contains(SrcReg) &&
- PPC::GPRCRegClass.contains(DestReg)) {
- BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg);
+ (PPC::G8RCRegClass.contains(DestReg) ||
+ PPC::GPRCRegClass.contains(DestReg))) {
+ bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
+ unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
+ unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
+ unsigned CRNum = TRI->getEncodingValue(SrcReg);
+ BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
+ if (CRNum == 7)
+ return;
+ // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
+ BuildMI(MBB, I, DL, get(ShCode), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(CRNum * 4 + 4)
+ .addImm(28)
+ .addImm(31);
return;
} else if (PPC::G8RCRegClass.contains(SrcReg) &&
PPC::VSFRCRegClass.contains(DestReg)) {
@@ -1343,17 +1783,53 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
- else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::QVFMR;
- else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::QVFMRs;
- else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::QVFMRb;
+ else if (Subtarget.pairedVectorMemops() &&
+ PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
+ if (SrcReg > PPC::VSRp15)
+ SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
+ else
+ SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
+ if (DestReg > PPC::VSRp15)
+ DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
+ else
+ DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
+ BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
+ addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
+ addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
+ return;
+ }
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
Opc = PPC::EVOR;
- else
+ else if ((PPC::ACCRCRegClass.contains(DestReg) ||
+ PPC::UACCRCRegClass.contains(DestReg)) &&
+ (PPC::ACCRCRegClass.contains(SrcReg) ||
+ PPC::UACCRCRegClass.contains(SrcReg))) {
+ // If primed, de-prime the source register, copy the individual registers
+ // and prime the destination if needed. The vector subregisters are
+ // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
+ // source is primed, we need to re-prime it after the copy as well.
+ PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
+ bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
+ bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
+ MCRegister VSLSrcReg =
+ PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
+ MCRegister VSLDestReg =
+ PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
+ if (SrcPrimed)
+ BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
+ for (unsigned Idx = 0; Idx < 4; Idx++)
+ BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
+ .addReg(VSLSrcReg + Idx)
+ .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
+ if (DestPrimed)
+ BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
+ if (SrcPrimed && !KillSrc)
+ BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
+ return;
+ } else
llvm_unreachable("Impossible reg-to-reg copy");
const MCInstrDesc &MCID = get(Opc);
@@ -1364,7 +1840,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
-static unsigned getSpillIndex(const TargetRegisterClass *RC) {
+unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
int OpcodeIndex = 0;
if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
@@ -1391,16 +1867,20 @@ static unsigned getSpillIndex(const TargetRegisterClass *RC) {
OpcodeIndex = SOK_VectorFloat8Spill;
} else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadBitSpill;
} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_SpillToVSR;
+ } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.pairedVectorMemops() &&
+ "Register unexpected when paired memops are disabled.");
+ OpcodeIndex = SOK_AccumulatorSpill;
+ } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.pairedVectorMemops() &&
+ "Register unexpected when paired memops are disabled.");
+ OpcodeIndex = SOK_UAccumulatorSpill;
+ } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.pairedVectorMemops() &&
+ "Register unexpected when paired memops are disabled.");
+ OpcodeIndex = SOK_PairedVecSpill;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -1437,9 +1917,6 @@ void PPCInstrInfo::StoreRegToStackSlot(
PPC::CRBITRCRegClass.hasSubClassEq(RC))
FuncInfo->setSpillsCR();
- if (PPC::VRSAVERCRegClass.hasSubClassEq(RC))
- FuncInfo->setSpillsVRSAVE();
-
if (isXFormMemOp(Opcode))
FuncInfo->setHasNonRISpills();
}
@@ -1495,9 +1972,6 @@ void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
PPC::CRBITRCRegClass.hasSubClassEq(RC))
FuncInfo->setSpillsCR();
- if (PPC::VRSAVERCRegClass.hasSubClassEq(RC))
- FuncInfo->setSpillsVRSAVE();
-
if (isXFormMemOp(Opcode))
FuncInfo->setHasNonRISpills();
}
@@ -1667,6 +2141,17 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
return false;
}
+bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
+ // across them, since some FP operations may change content of FPSCR.
+ // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
+ if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF)
+ return true;
+ return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
+}
+
bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
unsigned OpC = MI.getOpcode();
@@ -1675,6 +2160,10 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
bool isPPC64 = Subtarget.isPPC64();
MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
: (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
+ // Need add Def and Use for CTR implicit operand.
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg(), RegState::Implicit)
+ .addReg(Pred[1].getReg(), RegState::ImplicitDefine);
} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI.setDesc(get(PPC::BCLR));
MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
@@ -1694,6 +2183,10 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
bool isPPC64 = Subtarget.isPPC64();
MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
: (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
+ // Need add Def and Use for CTR implicit operand.
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg(), RegState::Implicit)
+ .addReg(Pred[1].getReg(), RegState::ImplicitDefine);
} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
MI.RemoveOperand(0);
@@ -1734,19 +2227,24 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
: (setLR ? PPC::BCCTRL : PPC::BCCTR)));
MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
- return true;
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
: (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
- return true;
+ } else {
+ MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
+ : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .add(Pred[1]);
}
- MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
- : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(Pred[0].getImm())
- .add(Pred[1]);
+ // Need add Def and Use for LR implicit operand.
+ if (setLR)
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
+ .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
+
return true;
}
@@ -1784,8 +2282,9 @@ bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
return false;
}
-bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const {
+bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
+ std::vector<MachineOperand> &Pred,
+ bool SkipDead) const {
// Note: At the present time, the contents of Pred from this function is
// unused by IfConversion. This implementation follows ARM by pushing the
// CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
@@ -2071,6 +2570,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (NewOpC == -1)
return false;
+ // This transformation should not be performed if `nsw` is missing and is not
+ // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
+ // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
+ // CRReg can reflect if compared values are equal, this optz is still valid.
+ if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
+ Sub && !Sub->getFlag(MachineInstr::NoSWrap))
+ return false;
+
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
// needs to be updated to be based on SUB. Push the condition code
// operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
@@ -2221,6 +2728,112 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return true;
}
+bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
+ const MachineOperand *BaseOp;
+ OffsetIsScalable = false;
+ if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
+ return false;
+ BaseOps.push_back(BaseOp);
+ return true;
+}
+
+static bool isLdStSafeToCluster(const MachineInstr &LdSt,
+ const TargetRegisterInfo *TRI) {
+ // If this is a volatile load/store, don't mess with it.
+ if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
+ return false;
+
+ if (LdSt.getOperand(2).isFI())
+ return true;
+
+ assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
+ // Can't cluster if the instruction modifies the base register
+ // or it is update form. e.g. ld r2,3(r2)
+ if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
+ return false;
+
+ return true;
+}
+
+// Only cluster instruction pair that have the same opcode, and they are
+// clusterable according to PowerPC specification.
+static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
+ const PPCSubtarget &Subtarget) {
+ switch (FirstOpc) {
+ default:
+ return false;
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STXSD:
+ case PPC::DFSTOREf64:
+ return FirstOpc == SecondOpc;
+ // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
+ // 32bit and 64bit instruction selection. They are clusterable pair though
+ // they are different opcode.
+ case PPC::STW:
+ case PPC::STW8:
+ return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
+ }
+}
+
+bool PPCInstrInfo::shouldClusterMemOps(
+ ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+ unsigned NumBytes) const {
+
+ assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
+ const MachineOperand &BaseOp1 = *BaseOps1.front();
+ const MachineOperand &BaseOp2 = *BaseOps2.front();
+ assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+ "Only base registers and frame indices are supported.");
+
+ // The NumLoads means the number of loads that has been clustered.
+ // Don't cluster memory op if there are already two ops clustered at least.
+ if (NumLoads > 2)
+ return false;
+
+ // Cluster the load/store only when they have the same base
+ // register or FI.
+ if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
+ (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
+ (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
+ return false;
+
+ // Check if the load/store are clusterable according to the PowerPC
+ // specification.
+ const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ const MachineInstr &SecondLdSt = *BaseOp2.getParent();
+ unsigned FirstOpc = FirstLdSt.getOpcode();
+ unsigned SecondOpc = SecondLdSt.getOpcode();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ // Cluster the load/store only when they have the same opcode, and they are
+ // clusterable opcode according to PowerPC specification.
+ if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
+ return false;
+
+ // Can't cluster load/store that have ordered or volatile memory reference.
+ if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
+ !isLdStSafeToCluster(SecondLdSt, TRI))
+ return false;
+
+ int64_t Offset1 = 0, Offset2 = 0;
+ unsigned Width1 = 0, Width2 = 0;
+ const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
+ if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
+ !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
+ Width1 != Width2)
+ return false;
+
+ assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
+ "getMemOperandWithOffsetWidth return incorrect base op");
+ // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
+ assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
+ return Offset1 + Width1 == Offset2;
+}
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
@@ -2270,7 +2883,14 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_PLT, "ppc-plt"},
{MO_PIC_FLAG, "ppc-pic"},
{MO_PCREL_FLAG, "ppc-pcrel"},
- {MO_GOT_FLAG, "ppc-got"}};
+ {MO_GOT_FLAG, "ppc-got"},
+ {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
+ {MO_TLSGD_FLAG, "ppc-tlsgd"},
+ {MO_TLSLD_FLAG, "ppc-tlsld"},
+ {MO_TPREL_FLAG, "ppc-tprel"},
+ {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
+ {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
+ {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"}};
return makeArrayRef(TargetFlags);
}
@@ -2351,6 +2971,31 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto DL = MI.getDebugLoc();
switch (MI.getOpcode()) {
+ case PPC::BUILD_UACC: {
+ MCRegister ACC = MI.getOperand(0).getReg();
+ MCRegister UACC = MI.getOperand(1).getReg();
+ if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
+ MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
+ MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
+ // FIXME: This can easily be improved to look up to the top of the MBB
+ // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
+ // we can just re-target any such XXLOR's to DstVSR + offset.
+ for (int VecNo = 0; VecNo < 4; VecNo++)
+ BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
+ .addReg(SrcVSR + VecNo)
+ .addReg(SrcVSR + VecNo);
+ }
+ // BUILD_UACC is expanded to 4 copies of the underlying vsx regisers.
+ // So after building the 4 copies, we can replace the BUILD_UACC instruction
+ // with a NOP.
+ LLVM_FALLTHROUGH;
+ }
+ case PPC::KILL_PAIR: {
+ MI.setDesc(get(PPC::UNENCODED_NOP));
+ MI.RemoveOperand(1);
+ MI.RemoveOperand(0);
+ return true;
+ }
case TargetOpcode::LOAD_STACK_GUARD: {
assert(Subtarget.isTargetLinux() &&
"Only Linux target is expected to contain LOAD_STACK_GUARD");
@@ -2642,7 +3287,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
}
unsigned PPCInstrInfo::getSpillTarget() const {
- return Subtarget.hasP9Vector() ? 1 : 0;
+ // With P10, we may need to spill paired vector registers or accumulator
+ // registers. MMA implies paired vectors, so we can just check that.
+ bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
+ return IsP10Variant ? 2 : Subtarget.hasP9Vector() ? 1 : 0;
}
const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
@@ -2653,13 +3301,35 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
return LoadSpillOpcodesArray[getSpillTarget()];
}
-void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
unsigned RegNo) const {
+ // Conservatively clear kill flag for the register if the instructions are in
+ // different basic blocks and in SSA form, because the kill flag may no longer
+ // be right. There is no need to bother with dead flags since defs with no
+ // uses will be handled by DCE.
+ MachineRegisterInfo &MRI = StartMI->getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA() && (StartMI->getParent() != EndMI->getParent())) {
+ MRI.clearKillFlags(RegNo);
+ return;
+ }
// Instructions between [StartMI, EndMI] should be in same basic block.
- assert((StartMI.getParent() == EndMI.getParent()) &&
+ assert((StartMI->getParent() == EndMI->getParent()) &&
"Instructions are not in same basic block");
+ // If before RA, StartMI may be def through COPY, we need to adjust it to the
+ // real def. See function getForwardingDefMI.
+ if (MRI.isSSA()) {
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = StartMI->readsWritesVirtualRegister(RegNo);
+ if (!Reads && !Writes) {
+ assert(Register::isVirtualRegister(RegNo) &&
+ "Must be a virtual register");
+ // Get real def and ignore copies.
+ StartMI = MRI.getVRegDef(RegNo);
+ }
+ }
+
bool IsKillSet = false;
auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
@@ -2672,21 +3342,21 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
// Set killed flag for EndMI.
// No need to do anything if EndMI defines RegNo.
int UseIndex =
- EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
+ EndMI->findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
if (UseIndex != -1) {
- EndMI.getOperand(UseIndex).setIsKill(true);
+ EndMI->getOperand(UseIndex).setIsKill(true);
IsKillSet = true;
// Clear killed flag for other EndMI operands related to RegNo. In some
// upexpected cases, killed may be set multiple times for same register
// operand in same MI.
- for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i)
+ for (int i = 0, e = EndMI->getNumOperands(); i != e; ++i)
if (i != UseIndex)
- clearOperandKillInfo(EndMI, i);
+ clearOperandKillInfo(*EndMI, i);
}
// Walking the inst in reverse order (EndMI -> StartMI].
- MachineBasicBlock::reverse_iterator It = EndMI;
- MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend();
+ MachineBasicBlock::reverse_iterator It = *EndMI;
+ MachineBasicBlock::reverse_iterator E = EndMI->getParent()->rend();
// EndMI has been handled above, skip it here.
It++;
MachineOperand *MO = nullptr;
@@ -2712,13 +3382,13 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
} else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
&getRegisterInfo()))) {
// No use found, set dead for its def.
- assert(&*It == &StartMI && "No new def between StartMI and EndMI.");
+ assert(&*It == StartMI && "No new def between StartMI and EndMI.");
MO->setIsDead(true);
break;
}
}
- if ((&*It) == &StartMI)
+ if ((&*It) == StartMI)
break;
}
// Ensure RegMo liveness is killed after EndMI.
@@ -3011,6 +3681,143 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
+bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
+ MachineInstr **ToErase) const {
+ MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
+ unsigned FoldingReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(FoldingReg))
+ return false;
+ MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
+ if (SrcMI->getOpcode() != PPC::RLWINM &&
+ SrcMI->getOpcode() != PPC::RLWINM_rec &&
+ SrcMI->getOpcode() != PPC::RLWINM8 &&
+ SrcMI->getOpcode() != PPC::RLWINM8_rec)
+ return false;
+ assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
+ MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
+ SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
+ "Invalid PPC::RLWINM Instruction!");
+ uint64_t SHSrc = SrcMI->getOperand(2).getImm();
+ uint64_t SHMI = MI.getOperand(2).getImm();
+ uint64_t MBSrc = SrcMI->getOperand(3).getImm();
+ uint64_t MBMI = MI.getOperand(3).getImm();
+ uint64_t MESrc = SrcMI->getOperand(4).getImm();
+ uint64_t MEMI = MI.getOperand(4).getImm();
+
+ assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
+ "Invalid PPC::RLWINM Instruction!");
+ // If MBMI is bigger than MEMI, we always can not get run of ones.
+ // RotatedSrcMask non-wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: B---E B---E
+ // MaskMI: -----------|--E B------
+ // Result: ----- --- (Bad candidate)
+ //
+ // RotatedSrcMask wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: --E B----|--E B----
+ // MaskMI: -----------|--E B------
+ // Result: --- -----|--- ----- (Bad candidate)
+ //
+ // One special case is RotatedSrcMask is a full set mask.
+ // RotatedSrcMask full:
+ // 0........31|32........63
+ // RotatedSrcMask: ------EB---|-------EB---
+ // MaskMI: -----------|--E B------
+ // Result: -----------|--- ------- (Good candidate)
+
+ // Mark special case.
+ bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
+
+ // For other MBMI > MEMI cases, just return.
+ if ((MBMI > MEMI) && !SrcMaskFull)
+ return false;
+
+ // Handle MBMI <= MEMI cases.
+ APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
+ // In MI, we only need low 32 bits of SrcMI, just consider about low 32
+ // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
+ // while in PowerPC ISA, lowerest bit is at index 63.
+ APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
+
+ APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
+ APInt FinalMask = RotatedSrcMask & MaskMI;
+ uint32_t NewMB, NewME;
+ bool Simplified = false;
+
+ // If final mask is 0, MI result should be 0 too.
+ if (FinalMask.isNullValue()) {
+ bool Is64Bit =
+ (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Replace Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
+ // Replace MI with "LI 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.getOperand(1).ChangeToImmediate(0);
+ MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
+ } else {
+ // Replace MI with "ANDI_rec reg, 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.getOperand(2).setImm(0);
+ MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+ }
+
+ LLVM_DEBUG(dbgs() << "With: ");
+ LLVM_DEBUG(MI.dump());
+
+ } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
+ NewMB <= NewME) ||
+ SrcMaskFull) {
+ // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
+ // than NewME. Otherwise we get a 64 bit value after folding, but MI
+ // return a 32 bit value.
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Converting Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ uint16_t NewSH = (SHSrc + SHMI) % 32;
+ MI.getOperand(2).setImm(NewSH);
+ // If SrcMI mask is full, no need to update MBMI and MEMI.
+ if (!SrcMaskFull) {
+ MI.getOperand(3).setImm(NewMB);
+ MI.getOperand(4).setImm(NewME);
+ }
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+
+ LLVM_DEBUG(dbgs() << "To: ");
+ LLVM_DEBUG(MI.dump());
+ }
+ if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
+ !SrcMI->hasImplicitDef()) {
+ // If FoldingReg has no non-debug use and it has no implicit def (it
+ // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
+ // Otherwise keep it.
+ *ToErase = SrcMI;
+ LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+ LLVM_DEBUG(SrcMI->dump());
+ }
+ return Simplified;
+}
+
bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
ImmInstrInfo &III, bool PostRA) const {
// The vast majority of the instructions would need their operand 2 replaced
@@ -3732,6 +4539,20 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
}
return false;
}
+ case PPC::SUBFIC:
+ case PPC::SUBFIC8: {
+ // Only transform this if the CARRY implicit operand is dead.
+ if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
+ return false;
+ int64_t Minuend = MI.getOperand(2).getImm();
+ if (isInt<16>(Minuend - SExtImm)) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc == PPC::SUBFIC8;
+ NewImm = Minuend - SExtImm;
+ break;
+ }
+ return false;
+ }
case PPC::RLDICL:
case PPC::RLDICL_rec:
case PPC::RLDICL_32:
@@ -3849,7 +4670,7 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
// ForwardingOperandReg = LI imm1
// y = op2 imm2, ForwardingOperandReg(killed)
if (IsForwardingOperandKilled)
- fixupIsDeadOrKill(DefMI, MI, ForwardingOperandReg);
+ fixupIsDeadOrKill(&DefMI, &MI, ForwardingOperandReg);
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
@@ -3941,9 +4762,9 @@ bool PPCInstrInfo::transformToNewImmFormFedByAdd(
// Update kill flag
if (RegMO->isKill() || IsKilledFor(RegMO->getReg()))
- fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+ fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+ fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
}
LLVM_DEBUG(dbgs() << "With:\n");
@@ -4054,12 +4875,12 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
// x = ADD reg(killed), imm
// y = XOP 0, x
if (IsFwdFeederRegKilled || RegMO->isKill())
- fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+ fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
// Pattern 3:
// ForwardKilledOperandReg = ADD reg, imm
// y = XOP 0, ForwardKilledOperandReg(killed)
if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+ fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
@@ -4215,7 +5036,7 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
// ForwardKilledOperandReg = LI imm
// y = XOP reg, ForwardKilledOperandReg(killed)
if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+ fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
return true;
}
@@ -4618,13 +5439,15 @@ MachineInstr *PPCInstrInfo::findLoopInstr(
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo *TRI) const {
- if (!LdSt.mayLoadOrStore())
+ if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
return false;
// Handle only loads/stores with base register followed by immediate offset.
- if (LdSt.getNumExplicitOperands() != 3)
+ if (!LdSt.getOperand(1).isImm() ||
+ (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
return false;
- if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg())
+ if (!LdSt.getOperand(1).isImm() ||
+ (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
return false;
if (!LdSt.hasOneMemOperand())
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index d98597f48340..c6ef1742b722 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -122,61 +122,73 @@ enum SpillOpcodeKey {
SOK_VSXVectorSpill,
SOK_VectorFloat8Spill,
SOK_VectorFloat4Spill,
- SOK_VRSaveSpill,
- SOK_QuadFloat8Spill,
- SOK_QuadFloat4Spill,
- SOK_QuadBitSpill,
SOK_SpillToVSR,
+ SOK_PairedVecSpill,
+ SOK_AccumulatorSpill,
+ SOK_UAccumulatorSpill,
SOK_SPESpill,
SOK_LastOpcodeSpill // This must be last on the enum.
};
// Define list of load and store spill opcodes.
+#define NoInstr PPC::INSTRUCTION_LIST_END
#define Pwr8LoadOpcodes \
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
- PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, \
- PPC::SPILLTOVSR_LD, PPC::EVLDD \
+ PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, PPC::EVLDD \
}
#define Pwr9LoadOpcodes \
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
- PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, \
- PPC::QVLFDXb, PPC::SPILLTOVSR_LD \
+ PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr \
+ }
+
+#define Pwr10LoadOpcodes \
+ { \
+ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
+ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
+ PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \
+ PPC::RESTORE_UACC, NoInstr \
}
#define Pwr8StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
- PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \
- PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST, \
- PPC::EVSTDD \
+ PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \
+ PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, PPC::EVSTDD \
}
#define Pwr9StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
- PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, \
- PPC::SPILLTOVSR_ST \
+ PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr \
+ }
+
+#define Pwr10StoreOpcodes \
+ { \
+ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
+ PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \
+ NoInstr \
}
// Initialize arrays for load and store spill opcodes on supported subtargets.
#define StoreOpcodesForSpill \
- { Pwr8StoreOpcodes, Pwr9StoreOpcodes }
+ { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes }
#define LoadOpcodesForSpill \
- { Pwr8LoadOpcodes, Pwr9LoadOpcodes }
+ { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes }
class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
- const unsigned StoreSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
+ const unsigned StoreSpillOpcodesArray[3][SOK_LastOpcodeSpill] =
StoreOpcodesForSpill;
- const unsigned LoadSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
+ const unsigned LoadSpillOpcodesArray[3][SOK_LastOpcodeSpill] =
LoadOpcodesForSpill;
void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill,
@@ -234,11 +246,17 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned getSpillTarget() const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
+ unsigned getSpillIndex(const TargetRegisterClass *RC) const;
int16_t getFMAOpIdxInfo(unsigned Opcode) const;
void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+ bool isLoadFromConstantPool(MachineInstr *I) const;
+ Register
+ generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const;
+ const Constant *getConstantFromConstantPool(MachineInstr *I) const;
virtual void anchor();
protected:
@@ -273,10 +291,10 @@ public:
}
static bool isSameClassPhysRegCopy(unsigned Opcode) {
- unsigned CopyOpcodes[] =
- { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
- PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
- PPC::CROR, PPC::EVOR, -1U };
+ unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR,
+ PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+ PPC::XSCPSGNDP, PPC::MCRF, PPC::CROR,
+ PPC::EVOR, -1U};
for (int i = 0; CopyOpcodes[i] != -1U; i++)
if (Opcode == CopyOpcodes[i])
return true;
@@ -330,14 +348,29 @@ public:
/// chain ending in \p Root. All potential patterns are output in the \p
/// P array.
bool getFMAPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &P) const;
+ SmallVectorImpl<MachineCombinerPattern> &P,
+ bool DoRegPressureReduce) const;
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in <Root>. All potential patterns are
/// output in the <Pattern> array.
- bool getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &P) const override;
+ bool getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &P,
+ bool DoRegPressureReduce) const override;
+
+ /// On PowerPC, we leverage machine combiner pass to reduce register pressure
+ /// when the register pressure is high for one BB.
+ /// Return true if register pressure for \p MBB is high and ABI is supported
+ /// to reduce register pressure. Otherwise return false.
+ bool
+ shouldReduceRegisterPressure(MachineBasicBlock *MBB,
+ RegisterClassInfo *RegClassInfo) const override;
+
+ /// Fixup the placeholders we put in genAlternativeCodeSequence() for
+ /// MachineCombiner.
+ void
+ finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) const override;
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
@@ -470,14 +503,18 @@ public:
// Predication support.
bool isPredicated(const MachineInstr &MI) const override;
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
bool PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const override;
bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
ArrayRef<MachineOperand> Pred2) const override;
- bool DefinesPredicate(MachineInstr &MI,
- std::vector<MachineOperand> &Pred) const override;
+ bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred,
+ bool SkipDead) const override;
// Comparison optimization.
@@ -497,6 +534,20 @@ public:
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
+ /// Get the base operand and byte offset of an instruction that reads/writes
+ /// memory.
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// Returns true if the two given memory operations should be scheduled
+ /// adjacent.
+ bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2,
+ unsigned NumLoads, unsigned NumBytes) const override;
+
/// Return true if two MIs access different memory addresses and false
/// otherwise
bool
@@ -554,6 +605,7 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
bool foldFrameOffset(MachineInstr &MI) const;
+ bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const;
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
@@ -565,15 +617,21 @@ public:
int64_t OffsetImm) const;
/// Fixup killed/dead flag for register \p RegNo between instructions [\p
- /// StartMI, \p EndMI]. Some PostRA transformations may violate register
- /// killed/dead flags semantics, this function can be called to fix up. Before
- /// calling this function,
+ /// StartMI, \p EndMI]. Some pre-RA or post-RA transformations may violate
+ /// register killed/dead flags semantics, this function can be called to fix
+ /// up. Before calling this function,
/// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
/// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
- /// and possible definition for \p RegNo is \p StartMI or \p EndMI.
- /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same
- /// basic block.
- void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+ /// and possible definition for \p RegNo is \p StartMI or \p EndMI. For
+ /// pre-RA cases, definition may be \p StartMI through COPY, \p StartMI
+ /// will be adjust to true definition.
+ /// 3. We can do accurate fixup for the case when all instructions between
+ /// [\p StartMI, \p EndMI] are in same basic block.
+ /// 4. For the case when \p StartMI and \p EndMI are not in same basic block,
+ /// we conservatively clear kill flag for all uses of \p RegNo for pre-RA
+ /// and for post-RA, we give an assertion as without reaching definition
+ /// analysis post-RA, \p StartMI and \p EndMI are hard to keep right.
+ void fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
unsigned RegNo) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 673ab63039cf..724af23542d7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -74,6 +74,9 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
]>;
+def SDT_PPCFtsqrt : SDTypeProfile<1, 1, [
+ SDTCisVT<0, i32>]>;
+
def SDT_PPClbrx : SDTypeProfile<1, 2, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
@@ -124,6 +127,8 @@ def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [
def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+def PPCfsqrt : SDNode<"PPCISD::FSQRT", SDTFPUnaryOp, []>;
+def PPCftsqrt : SDNode<"PPCISD::FTSQRT", SDT_PPCFtsqrt,[]>;
def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
@@ -134,6 +139,28 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
+def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS",
+ SDTFPRoundOp, [SDNPHasChain]>;
+def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS",
+ SDTFPRoundOp, [SDNPHasChain]>;
+
+def PPCany_fcfid : PatFrags<(ops node:$op),
+ [(PPCfcfid node:$op),
+ (PPCstrict_fcfid node:$op)]>;
+def PPCany_fcfidu : PatFrags<(ops node:$op),
+ [(PPCfcfidu node:$op),
+ (PPCstrict_fcfidu node:$op)]>;
+def PPCany_fcfids : PatFrags<(ops node:$op),
+ [(PPCfcfids node:$op),
+ (PPCstrict_fcfids node:$op)]>;
+def PPCany_fcfidus : PatFrags<(ops node:$op),
+ [(PPCfcfidus node:$op),
+ (PPCstrict_fcfidus node:$op)]>;
+
def PPCcv_fp_to_uint_in_vsr:
SDNode<"PPCISD::FP_TO_UINT_IN_VSR", SDT_PPCcv_fp_to_int, []>;
def PPCcv_fp_to_sint_in_vsr:
@@ -160,7 +187,12 @@ def PPCmffs : SDNode<"PPCISD::MFFS",
// Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp,
+ [SDNPHasChain]>;
+def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs),
+ [(PPCfaddrtz node:$lhs, node:$rhs),
+ (PPCstrict_faddrtz node:$lhs, node:$rhs)]>;
def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
@@ -195,6 +227,7 @@ def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+def PPCpaddiDtprel : SDNode<"PPCISD::PADDI_DTPREL", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
@@ -203,16 +236,6 @@ def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
-def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
-def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
-def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
-def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
-
-def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
-
-def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
- [SDNPHasChain, SDNPMayLoad]>;
-
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -225,6 +248,28 @@ def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>;
def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
+def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+
+def PPCany_fctidz : PatFrags<(ops node:$op),
+ [(PPCstrict_fctidz node:$op),
+ (PPCfctidz node:$op)]>;
+def PPCany_fctiwz : PatFrags<(ops node:$op),
+ [(PPCstrict_fctiwz node:$op),
+ (PPCfctiwz node:$op)]>;
+def PPCany_fctiduz : PatFrags<(ops node:$op),
+ [(PPCstrict_fctiduz node:$op),
+ (PPCfctiduz node:$op)]>;
+def PPCany_fctiwuz : PatFrags<(ops node:$op),
+ [(PPCstrict_fctiwuz node:$op),
+ (PPCfctiwuz node:$op)]>;
+
// Move 2 i64 values into a VSX register
def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
SDTypeProfile<1, 2,
@@ -295,7 +340,7 @@ def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc,
[SDNPHasChain, SDNPSideEffect]>;
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
-def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+def PPCvcmp_rec : SDNode<"PPCISD::VCMP_rec", SDT_PPCvcmp, [SDNPOutGlue]>;
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
@@ -327,6 +372,10 @@ def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>;
// PC Relative Specific Nodes
def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>;
+def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR",
+ SDTIntUnaryOp, []>;
+def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR",
+ SDTIntUnaryOp, []>;
//===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments.
@@ -446,37 +495,41 @@ def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{
return isUInt<32>(Imm);
}]>;
-// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// This is a somewhat weaker condition than actually checking for 4-byte
+// alignment. It is simply checking that the displacement can be represented
+// as an immediate that is a multiple of 4 (i.e. the requirements for DS-Form
+// instructions).
+// But some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
// restricted memrix (4-aligned) constants are alignment sensitive. If these
// offsets are hidden behind TOC entries than the values of the lower-order
// bits cannot be checked directly. As a result, we need to also incorporate
// an alignment check into the relevant patterns.
-def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 4;
+def DSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4;
}]>;
-def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+def DSFormStore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAlignment() >= 4;
+ return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4;
}]>;
-def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 4;
+def DSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlignment() >= 4;
}]>;
-def aligned4pre_store : PatFrag<
+def DSFormPreStore : PatFrag<
(ops node:$val, node:$base, node:$offset),
(pre_store node:$val, node:$base, node:$offset), [{
- return cast<StoreSDNode>(N)->getAlignment() >= 4;
+ return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlignment() >= 4;
}]>;
-def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() < 4;
+def NonDSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
}]>;
-def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+def NonDSFormStore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAlignment() < 4;
+ return cast<StoreSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
}]>;
-def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() < 4;
+def NonDSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4);
}]>;
// This is a somewhat weaker condition than actually checking for 16-byte
@@ -617,6 +670,7 @@ def PPCU1ImmAsmOperand : AsmOperandClass {
def u1imm : Operand<i32> {
let PrintMethod = "printU1ImmOperand";
let ParserMatchClass = PPCU1ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU2ImmAsmOperand : AsmOperandClass {
@@ -626,6 +680,7 @@ def PPCU2ImmAsmOperand : AsmOperandClass {
def u2imm : Operand<i32> {
let PrintMethod = "printU2ImmOperand";
let ParserMatchClass = PPCU2ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCATBitsAsHintAsmOperand : AsmOperandClass {
@@ -635,6 +690,7 @@ def PPCATBitsAsHintAsmOperand : AsmOperandClass {
def atimm : Operand<i32> {
let PrintMethod = "printATBitsAsHint";
let ParserMatchClass = PPCATBitsAsHintAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU3ImmAsmOperand : AsmOperandClass {
@@ -644,6 +700,7 @@ def PPCU3ImmAsmOperand : AsmOperandClass {
def u3imm : Operand<i32> {
let PrintMethod = "printU3ImmOperand";
let ParserMatchClass = PPCU3ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU4ImmAsmOperand : AsmOperandClass {
@@ -653,6 +710,7 @@ def PPCU4ImmAsmOperand : AsmOperandClass {
def u4imm : Operand<i32> {
let PrintMethod = "printU4ImmOperand";
let ParserMatchClass = PPCU4ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
@@ -662,6 +720,7 @@ def s5imm : Operand<i32> {
let PrintMethod = "printS5ImmOperand";
let ParserMatchClass = PPCS5ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<5>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU5ImmAsmOperand : AsmOperandClass {
let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
@@ -671,6 +730,7 @@ def u5imm : Operand<i32> {
let PrintMethod = "printU5ImmOperand";
let ParserMatchClass = PPCU5ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU6ImmAsmOperand : AsmOperandClass {
let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
@@ -680,6 +740,7 @@ def u6imm : Operand<i32> {
let PrintMethod = "printU6ImmOperand";
let ParserMatchClass = PPCU6ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<6>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU7ImmAsmOperand : AsmOperandClass {
let Name = "U7Imm"; let PredicateMethod = "isU7Imm";
@@ -689,6 +750,7 @@ def u7imm : Operand<i32> {
let PrintMethod = "printU7ImmOperand";
let ParserMatchClass = PPCU7ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU8ImmAsmOperand : AsmOperandClass {
let Name = "U8Imm"; let PredicateMethod = "isU8Imm";
@@ -698,6 +760,7 @@ def u8imm : Operand<i32> {
let PrintMethod = "printU8ImmOperand";
let ParserMatchClass = PPCU8ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU10ImmAsmOperand : AsmOperandClass {
let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
@@ -707,6 +770,7 @@ def u10imm : Operand<i32> {
let PrintMethod = "printU10ImmOperand";
let ParserMatchClass = PPCU10ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<10>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU12ImmAsmOperand : AsmOperandClass {
let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
@@ -716,6 +780,7 @@ def u12imm : Operand<i32> {
let PrintMethod = "printU12ImmOperand";
let ParserMatchClass = PPCU12ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<12>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
@@ -726,6 +791,7 @@ def s16imm : Operand<i32> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU16ImmAsmOperand : AsmOperandClass {
let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
@@ -736,6 +802,7 @@ def u16imm : Operand<i32> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS17ImmAsmOperand : AsmOperandClass {
let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
@@ -749,6 +816,7 @@ def s17imm : Operand<i32> {
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS34ImmAsmOperand : AsmOperandClass {
let Name = "S34Imm";
@@ -757,9 +825,17 @@ def PPCS34ImmAsmOperand : AsmOperandClass {
}
def s34imm : Operand<i64> {
let PrintMethod = "printS34ImmOperand";
- let EncoderMethod = "getImm34Encoding";
+ let EncoderMethod = "getImm34EncodingNoPCRel";
let ParserMatchClass = PPCS34ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<34>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def s34imm_pcrel : Operand<i64> {
+ let PrintMethod = "printS34ImmOperand";
+ let EncoderMethod = "getImm34EncodingPCRel";
+ let ParserMatchClass = PPCS34ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<34>";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def PPCImmZeroAsmOperand : AsmOperandClass {
let Name = "ImmZero";
@@ -770,6 +846,7 @@ def immZero : Operand<i32> {
let PrintMethod = "printImmZeroOperand";
let ParserMatchClass = PPCImmZeroAsmOperand;
let DecoderMethod = "decodeImmZeroOperand";
+ let OperandType = "OPERAND_IMMEDIATE";
}
def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
@@ -915,40 +992,47 @@ def memri : Operand<iPTR> {
let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
let DecoderMethod = "decodeMemRIOperands";
+ let OperandType = "OPERAND_MEMORY";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
+ let OperandType = "OPERAND_MEMORY";
}
def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
let DecoderMethod = "decodeMemRIXOperands";
+ let OperandType = "OPERAND_MEMORY";
}
def memrix16 : Operand<iPTR> { // memri, imm is 16-aligned, 12-bit, Inst{16:27}
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIX16Encoding";
let DecoderMethod = "decodeMemRIX16Operands";
+ let OperandType = "OPERAND_MEMORY";
}
def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getSPE8DisEncoding";
let DecoderMethod = "decodeSPE8Operands";
+ let OperandType = "OPERAND_MEMORY";
}
def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getSPE4DisEncoding";
let DecoderMethod = "decodeSPE4Operands";
+ let OperandType = "OPERAND_MEMORY";
}
def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getSPE2DisEncoding";
let DecoderMethod = "decodeSPE2Operands";
+ let OperandType = "OPERAND_MEMORY";
}
// A single-register address. This is used with the SjLj
@@ -956,6 +1040,7 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
// G8RC_NOX0 registers.
def memr : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
+ let OperandType = "OPERAND_MEMORY";
}
def PPCTLSRegOperand : AsmOperandClass {
let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
@@ -981,11 +1066,13 @@ def pred : Operand<OtherVT> {
// Define PowerPC specific addressing mode.
// d-form
-def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
// ds-form
-def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
// dq-form
-def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
+def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
+// 8LS:d-form
+def iaddrX34 : ComplexPattern<iPTR, 2, "SelectAddrImmX34", [], []>; // "pstxvp"
// Below forms are all x-form addressing mode, use three different ones so we
// can make a accurate check for x-form instructions in ISEL.
@@ -1031,6 +1118,11 @@ def HasExtDiv : Predicate<"Subtarget->hasExtDiv()">;
def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
def HasFPU : Predicate<"Subtarget->hasFPU()">;
def PCRelativeMemops : Predicate<"Subtarget->hasPCRelativeMemops()">;
+def IsNotISA3_1 : Predicate<"!Subtarget->isISA3_1()">;
+
+// AIX assembler may not be modern enough to support some extended mne.
+def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">,
+ AssemblerPredicate<(any_of (not AIXOS), FeatureModernAIXAs)>;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -1389,10 +1481,7 @@ def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2
"#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-
-def UPDATE_VRSAVE : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS),
- "UPDATE_VRSAVE $rD, $rS", []>;
-}
+} // hasCtrlDep
let Defs = [R1], Uses = [R1] in
def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
@@ -1406,9 +1495,14 @@ def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
(ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32",
[(set i32:$result,
(PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>;
-def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
- gprc:$sp),
+def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs
+ gprc:$fp, gprc:$actual_negsize),
(ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
+def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32 : PPCEmitTimePseudo<(outs
+ gprc:$fp, gprc:$actual_negsize),
+ (ins gprc:$negsize, memri:$fpsi),
+ "#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32", []>,
+ RegConstraint<"$actual_negsize = $negsize">;
def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
(ins i64imm:$stacksize),
"#PROBED_STACKALLOC_32", []>;
@@ -1513,6 +1607,9 @@ def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in),
"#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>;
+
+def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM),
+ "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>;
}
let Defs = [LR] in
@@ -1562,11 +1659,12 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
"bc 4, $bi, $dst">;
- let isReturn = 1, Uses = [LR, RM] in
+ let isReturn = 1, Uses = [LR, RM] in {
def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi),
"bclr 12, $bi, 0", IIC_BrB, []>;
def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi),
"bclr 4, $bi, 0", IIC_BrB, []>;
+ }
}
let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
@@ -1838,7 +1936,7 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBF : DCB_Form_hint<86, (outs), (ins u5imm:$TH, memrr:$dst),
+def DCBF : DCB_Form_hint<86, (outs), (ins u3imm:$TH, memrr:$dst),
"dcbf $dst, $TH", IIC_LdStDCBF, []>,
PPC970_DGroup_Single;
@@ -2373,7 +2471,7 @@ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", IIC_LdStLMW, []>;
-def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
+def SYNC : XForm_24_sync<31, 598, (outs), (ins u2imm:$L),
"sync $L", IIC_LdStSync, []>;
let isCodeGenOnly = 1 in {
@@ -2568,37 +2666,26 @@ let isCompare = 1, hasSideEffects = 0 in {
}
}
let PPC970_Unit = 3, Predicates = [HasFPU] in { // FPU Operations.
-//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
-// "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
-let isCompare = 1, hasSideEffects = 0 in {
+let isCompare = 1, mayRaiseFPException = 1, hasSideEffects = 0 in {
def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
"fcmpu $crD, $fA, $fB", IIC_FPCompare>;
- let Interpretation64Bit = 1, isCodeGenOnly = 1 in
- def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
- "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+ def FCMPOS : XForm_17<63, 32, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
+ "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+ def FCMPOD : XForm_17<63, 32, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
+ }
}
def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
"ftdiv $crD, $fA, $fB", IIC_FPCompare>;
def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
- "ftsqrt $crD, $fB", IIC_FPCompare>;
-
-let Uses = [RM], mayRaiseFPException = 1 in {
- let hasSideEffects = 0 in {
- defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiw", "$frD, $frB", IIC_FPGeneral,
- []>;
- defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwu", "$frD, $frB", IIC_FPGeneral,
- []>;
- defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
-
- defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
- "frsp", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (any_fpround f64:$frB))]>;
+ "ftsqrt $crD, $fB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt f64:$fB))]>;
+let mayRaiseFPException = 1, hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,
@@ -2606,9 +2693,7 @@ let Uses = [RM], mayRaiseFPException = 1 in {
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (any_fround f32:$frB))]>;
- }
- let hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
"frip", "$frD, $frB", IIC_FPGeneral,
@@ -2630,6 +2715,22 @@ let Uses = [RM], mayRaiseFPException = 1 in {
defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
"frim", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (any_ffloor f32:$frB))]>;
+}
+
+let Uses = [RM], mayRaiseFPException = 1, hasSideEffects = 0 in {
+ defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiw", "$frD, $frB", IIC_FPGeneral,
+ []>;
+ defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwu", "$frD, $frB", IIC_FPGeneral,
+ []>;
+ defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCany_fctiwz f64:$frB))]>;
+
+ defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
+ "frsp", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (any_fpround f64:$frB))]>;
defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
"fsqrt", "$frD, $frB", IIC_FPSqrtD,
@@ -2637,9 +2738,10 @@ let Uses = [RM], mayRaiseFPException = 1 in {
defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
"fsqrts", "$frD, $frB", IIC_FPSqrtS,
[(set f32:$frD, (any_fsqrt f32:$frB))]>;
- }
- }
}
+}
+
+def : Pat<(PPCfsqrt f64:$frA), (FSQRT $frA)>;
/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
/// often coalesced away and we don't want the dispatch group builder to think
@@ -2684,6 +2786,7 @@ defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
[(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
// Reciprocal estimates.
+let mayRaiseFPException = 1 in {
defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
"fre", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfre f64:$frB))]>;
@@ -2697,6 +2800,7 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
"frsqrtes", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
+}
// XL-Form instructions. condition register logical ops.
//
@@ -2857,18 +2961,6 @@ let isCodeGenOnly = 1 in {
def : InstAlias<"mtvrsave $rS", (MTVRSAVE gprc:$rS)>;
def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>;
-// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
-// so we'll need to scavenge a register for it.
-let mayStore = 1 in
-def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
- "#SPILL_VRSAVE", []>;
-
-// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
-// spilled), so we'll need to scavenge a register for it.
-let mayLoad = 1 in
-def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
- "#RESTORE_VRSAVE", []>;
-
let hasSideEffects = 0 in {
// mtocrf's input needs to be prepared by shifting by an amount dependent
// on the cr register selected. Thus, post-ra anti-dep breaking must not
@@ -2908,20 +3000,24 @@ def : InstAlias<"mtcr $rA", (MTCRF 255, gprc:$rA)>;
let Predicates = [HasFPU] in {
// Custom inserter instruction to perform FADD in round-to-zero mode.
-let Uses = [RM] in {
+let Uses = [RM], mayRaiseFPException = 1 in {
def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
- [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>;
}
// The above pseudo gets expanded to make use of the following instructions
// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
-let Uses = [RM], Defs = [RM] in {
- def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
- "mtfsb0 $FM", IIC_IntMTFSB0, []>,
- PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
- "mtfsb1 $FM", IIC_IntMTFSB0, []>,
- PPC970_DGroup_Single, PPC970_Unit_FPU;
+
+// When FM is 30/31, we are setting the 62/63 bit of FPSCR, the implicit-def
+// RM should be set.
+def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+
+let Defs = [RM] in {
let isCodeGenOnly = 1 in
def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
"mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
@@ -3060,7 +3156,7 @@ def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC_rec gprc:$rA, gprc:$rC, gprc:$rB)>
// this type.
//
let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations.
-let Uses = [RM] in {
+let mayRaiseFPException = 1, Uses = [RM] in {
let isCommutable = 1 in {
defm FMADD : AForm_1r<63, 29,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
@@ -3246,9 +3342,13 @@ def : Pat<(PPCcall (i32 texternalsym:$dst)),
// Calls for AIX only
def : Pat<(PPCcall (i32 mcsym:$dst)),
(BL mcsym:$dst)>;
+
def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
(BL_NOP mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i32 texternalsym:$dst)),
+ (BL_NOP texternalsym:$dst)>;
+
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -3258,7 +3358,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
(TCRETURNri CTRRC:$dst, imm:$imm)>;
-
+def : Pat<(int_ppc_readflm), (MFFS)>;
// Hi and Lo for Darwin Global Addresses.
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
@@ -3412,7 +3512,7 @@ def : Pat<(f64 (extloadf32 iaddr:$src)),
def : Pat<(f64 (extloadf32 xaddr:$src)),
(COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
-def : Pat<(f64 (fpextend f32:$src)),
+def : Pat<(f64 (any_fpextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
}
@@ -3452,7 +3552,6 @@ include "PPCInstrAltivec.td"
include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
-include "PPCInstrQPX.td"
include "PPCInstrHTM.td"
def crnot : OutPatFrag<(ops node:$in),
@@ -3836,6 +3935,7 @@ def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
(EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+let Predicates = [IsNotISA3_1] in {
// Instantiations of CRNotPat for i32.
defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
(EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
@@ -3893,106 +3993,62 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)),
(EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
(EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+}
-let Predicates = [HasFPU] in {
-// Instantiations of CRNotPat for f32.
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
-
-// Instantiations of CRNotPat for f64.
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
-
-// Instantiations of CRNotPat for f128.
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
+multiclass FSetCCPat<SDNode SetCC, ValueType Ty, PatLeaf FCmp> {
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
+
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
}
-// SETCC for f32.
let Predicates = [HasFPU] in {
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+// FCMPU: If either of the operands is a Signaling NaN, then VXSNAN is set.
+// SETCC for f32.
+defm : FSetCCPat<any_fsetcc, f32, FCMPUS>;
// SETCC for f64.
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+defm : FSetCCPat<any_fsetcc, f64, FCMPUD>;
// SETCC for f128.
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETLT)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOGT)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETGT)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOEQ)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
+defm : FSetCCPat<any_fsetcc, f128, XSCMPUQP>;
+
+// FCMPO: If either of the operands is a Signaling NaN, then VXSNAN is set and,
+// if neither operand is a Signaling NaN but at least one operand is a Quiet NaN,
+// then VXVC is set.
+// SETCCS for f32.
+defm : FSetCCPat<strict_fsetccs, f32, FCMPOS>;
+
+// SETCCS for f64.
+defm : FSetCCPat<strict_fsetccs, f64, FCMPOD>;
+// SETCCS for f128.
+defm : FSetCCPat<strict_fsetccs, f128, XSCMPOQP>;
}
// This must be in this file because it relies on patterns defined in this file
@@ -4261,7 +4317,7 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
"icbi $src", IIC_LdStICBI, []>;
-def WAIT : XForm_24_sync<31, 30, (outs), (ins i32imm:$L),
+def WAIT : XForm_24_sync<31, 30, (outs), (ins u2imm:$L),
"wait $L", IIC_LdStLoad, []>;
def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO),
@@ -4279,7 +4335,7 @@ def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB),
def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB),
"mfsrin $RS, $RB", IIC_SprMFSR>;
-def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
+def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, u1imm:$L),
"mtmsr $RS, $L", IIC_SprMTMSR>;
def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS),
@@ -4308,15 +4364,17 @@ def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
"mfmsr $RT", IIC_SprMFMSR, []>;
-def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
+def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, u1imm:$L),
"mtmsrd $RS, $L", IIC_SprMTMSRD>;
def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
"mcrfs $BF, $BFA", IIC_BrMCR>;
+// If W is 0 and BF is 7, the 60:63 bits will be set, we should set the
+// implicit-def RM.
def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
"mtfsfi $BF, $U, $W", IIC_IntMFFS>;
-
+let Defs = [CR1] in
def MTFSFI_rec : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
"mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isRecordForm;
@@ -4324,12 +4382,15 @@ def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>;
def : InstAlias<"mtfsfi. $BF, $U", (MTFSFI_rec crrc:$BF, i32imm:$U, 0)>;
let Predicates = [HasFPU] in {
+let Defs = [RM] in {
def MTFSF : XFLForm_1<63, 711, (outs),
- (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+ (ins i32imm:$FLM, f8rc:$FRB, u1imm:$L, i32imm:$W),
"mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>;
+let Defs = [CR1] in
def MTFSF_rec : XFLForm_1<63, 711, (outs),
- (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+ (ins i32imm:$FLM, f8rc:$FRB, u1imm:$L, i32imm:$W),
"mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isRecordForm;
+}
def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>;
def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSF_rec i32imm:$FLM, f8rc:$FRB, 0, 0)>;
@@ -4556,6 +4617,16 @@ def : Pat<(int_ppc_dcbfl xoaddr:$dst),
def : Pat<(int_ppc_dcbflp xoaddr:$dst),
(DCBF 3, xoaddr:$dst)>;
+let Predicates = [IsISA3_1] in {
+ def DCBFPS : PPCAsmPseudo<"dcbfps $dst", (ins memrr:$dst)>;
+ def DCBSTPS : PPCAsmPseudo<"dcbstps $dst", (ins memrr:$dst)>;
+
+ def : Pat<(int_ppc_dcbfps xoaddr:$dst),
+ (DCBF 4, xoaddr:$dst)>;
+ def : Pat<(int_ppc_dcbstps xoaddr:$dst),
+ (DCBF 6, xoaddr:$dst)>;
+}
+
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -4582,8 +4653,11 @@ def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+//Disable this alias on AIX for now because as does not support them.
+let Predicates = [ModernAs] in {
def : InstAlias<"mtudscr $Rx", (MTSPR 3, gprc:$Rx)>;
def : InstAlias<"mfudscr $Rx", (MFSPR gprc:$Rx, 3)>;
+}
def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 2bab73418e10..b9eb3b3b7d37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1,3 +1,8 @@
+// Mask immediates for MMA instructions (2, 4 and 8 bits).
+def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
+def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
+def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
+
//===----------------------------------------------------------------------===//
// PowerPC ISA 3.1 specific type constraints.
//
@@ -5,12 +10,35 @@
def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>,
SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3>
]>;
+def SDT_PPCAccBuild : SDTypeProfile<1, 4, [
+ SDTCisVT<0, v512i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>,
+ SDTCisVT<3, v4i32>, SDTCisVT<4, v4i32>
+]>;
+def SDT_PPCPairBuild : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v256i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>
+]>;
+def SDT_PPCAccExtractVsx : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v4i32>, SDTCisVT<1, v512i1>, SDTCisInt<2>
+]>;
+def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v4i32>, SDTCisVT<1, v256i1>, SDTCisInt<2>
+]>;
+def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
+]>;
//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
//
def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
+def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>;
+def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>;
+def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
+ []>;
+def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
+ []>;
+def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
//===----------------------------------------------------------------------===//
@@ -18,6 +46,15 @@ def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
// address computations).
class isPCRel { bit PCRel = 1; }
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+ [SDNPHasChain, SDNPMayLoad]>;
+
// Top-level class for prefixed instructions.
class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin> : Instruction {
@@ -59,6 +96,39 @@ class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
string BaseName = "";
}
+// VX-Form: [ PO VT R VB RC XO ]
+class VXForm_VTB5_RC<bits<10> xo, bits<5> R, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VT;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VT;
+ let Inst{11-15} = R;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+// Multiclass definition to account for record and non-record form
+// instructions of VXRForm.
+multiclass VXForm_VTB5_RCr<bits<10> xo, bits<5> R, dag OOL, dag IOL,
+ string asmbase, string asmstr,
+ InstrItinClass itin, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : VXForm_VTB5_RC<xo, R, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ itin, pattern>, RecFormRel;
+ let Defs = [CR6] in
+ def _rec : VXForm_VTB5_RC<xo, R, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)),
+ itin, []>, isRecordForm, RecFormRel;
+ }
+}
+
class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: PI<1, opcode, OOL, IOL, asmstr, itin> {
@@ -242,29 +312,37 @@ class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
}
-// VX-Form: [PO VRT / UIM RB XO].
-// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
-// "/ UIM" (unused bit followed by a 4-bit immediate)
-// Destructive (insert) forms are suffixed with _ins.
-class VXForm_VRT5_UIM5_RB5_ins<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
- !strconcat(opc, " $vD, $rB, $UIM"), IIC_VecGeneral, pattern>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
-
// VX-Form: [PO VRT RA VRB XO].
// Destructive (insert) forms are suffixed with _ins.
class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, vrrc:$vB),
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, vrrc:$vB),
!strconcat(opc, " $vD, $rA, $vB"), IIC_VecGeneral, pattern>,
RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
// VX-Form: [PO VRT RA RB XO].
// Destructive (insert) forms are suffixed with _ins.
class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, gprc:$rB),
!strconcat(opc, " $vD, $rA, $rB"), IIC_VecGeneral, pattern>,
RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+// VX-Form: [ PO BF // VRA VRB XO ]
+class VXForm_BF3_VAB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
// VN-Form: [PO VRT VRA VRB PS SD XO]
// SD is "Shift Direction"
class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr,
@@ -285,6 +363,22 @@ class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr,
let Inst{26-31} = xo;
}
+class VXForm_RD5_MP_VB5<bits<11> xo, bits<4> eo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RD;
+ bits<5> VB;
+ bit MP;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RD;
+ let Inst{11-14} = eo;
+ let Inst{15} = MP;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
// 8RR:D-Form: [ 1 1 0 // // imm0
// PO T XO TX imm1 ].
class 8RR_DForm_IMM32_XT6<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
@@ -415,6 +509,13 @@ class XX2_BF3_XO5_XB6_XO9<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL,
let Inst{31} = 0;
}
+// X-Form: [ PO RT BI /// XO / ]
+class XForm_XT5_BI5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let B = 0;
+}
+
multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
dag PCRel_IOL, string asmstr,
InstrItinClass itin> {
@@ -444,14 +545,307 @@ multiclass 8LS_DForm_R_SI34_XT6_RA5_p<bits<5> opcode, dag OOL, dag IOL,
isPCRel;
}
+def PPCRegVSRpRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRpRC"; let PredicateMethod = "isVSRpEvenRegNumber";
+}
+
+def vsrprc : RegisterOperand<VSRpRC> {
+ let ParserMatchClass = PPCRegVSRpRCAsmOperand;
+}
+
+def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber";
+}
+
+def vsrpevenrc : RegisterOperand<VSRpRC> {
+ let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand;
+ let EncoderMethod = "getVSRpEvenEncoding";
+ let DecoderMethod = "decodeVSRpEvenOperands";
+}
+
+class DQForm_XTp5_RA17_MEM<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> XTp;
+ bits<17> DQ_RA;
+ let Pattern = pattern;
+
+ let Inst{6-9} = XTp{3-0};
+ let Inst{10} = XTp{4};
+ let Inst{11-15} = DQ_RA{16-12}; // Register #
+ let Inst{16-27} = DQ_RA{11-0}; // Displacement.
+ let Inst{28-31} = xo;
+}
+
+class XForm_XTp5_XAB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin>, XFormMemOp {
+ bits<5> XTp;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+ let Inst{6-9} = XTp{3-0};
+ let Inst{10} = XTp{4};
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> XTp;
+ bits<39> D_RA;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-10} = 0;
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = D_RA{33-16}; // Imm18
+
+ // The instruction.
+ let Inst{38-41} = XTp{3-0};
+ let Inst{42} = XTp{4};
+ let Inst{43-47} = D_RA{38-34}; // Register #
+ let Inst{48-63} = D_RA{15-0}; // D
+}
+
+multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> pref, bits<6> opcode, dag OOL,
+ dag IOL, dag PCRel_IOL,
+ string asmstr, InstrItinClass itin> {
+ def NAME : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>,
+ isPCRel;
+}
+
+def PPCRegACCRCAsmOperand : AsmOperandClass {
+ let Name = "RegACCRC"; let PredicateMethod = "isACCRegNumber";
+}
+
+def acc : RegisterOperand<ACCRC> {
+ let ParserMatchClass = PPCRegACCRCAsmOperand;
+}
+
+def uacc : RegisterOperand<UACCRC> {
+ let ParserMatchClass = PPCRegACCRCAsmOperand;
+}
+
+// [PO AS XO2 XO]
+class XForm_AT3<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = AT;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = xo2;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XX3Form_AT3_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = AT;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class MMIRR_XX3Form_XY4P2_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<4> YMSK;
+ bits<2> PMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-15} = 0;
+ let Inst{16-17} = PMSK;
+ let Inst{18-23} = 0;
+ let Inst{24-27} = XMSK;
+ let Inst{28-31} = YMSK;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
+class MMIRR_XX3Form_XY4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<4> YMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-23} = 0;
+ let Inst{24-27} = XMSK;
+ let Inst{28-31} = YMSK;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
+class MMIRR_XX3Form_X4Y2_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<2> YMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-23} = 0;
+ let Inst{24-27} = XMSK;
+ let Inst{28-29} = YMSK;
+ let Inst{30-31} = 0;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
+class MMIRR_XX3Form_XY4P8_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<4> YMSK;
+ bits<8> PMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-15} = 0;
+ let Inst{16-23} = PMSK;
+ let Inst{24-27} = XMSK;
+ let Inst{28-31} = YMSK;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
+class MMIRR_XX3Form_XYP4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<3> AT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<4> XMSK;
+ bits<4> YMSK;
+ bits<4> PMSK;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 3;
+ let Inst{8-11} = 9;
+ let Inst{12-15} = 0;
+ let Inst{16-19} = PMSK;
+ let Inst{20-23} = 0;
+ let Inst{24-27} = XMSK;
+ let Inst{28-31} = YMSK;
+
+ // The instruction.
+ let Inst{38-40} = AT;
+ let Inst{41-42} = 0;
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-60} = xo;
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = 0;
+}
+
def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
+def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">;
+def MMA : Predicate<"Subtarget->hasMMA()">;
+
+def RCCp {
+ dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC);
+ dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC);
+}
let Predicates = [PrefixInstrs] in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm PADDI8 :
MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI),
- (ins immZero:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm_pcrel:$SI),
"paddi $RT, $RA, $SI", IIC_LdStLFD>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
@@ -461,7 +855,7 @@ let Predicates = [PrefixInstrs] in {
}
defm PADDI :
MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI),
- (ins immZero:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm_pcrel:$SI),
"paddi $RT, $RA, $SI", IIC_LdStLFD>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
@@ -592,6 +986,695 @@ let Predicates = [PrefixInstrs] in {
}
}
+// Multiclass definitions for MMA accumulator instructions.
+// ----------------------------------------------------------------------------
+
+// Defines 2 unmasked instructions where the xo field for acc/non-acc version
+// is even/odd.
+multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ let Predicates = [MMA] in {
+ def NAME :
+ XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL,
+ !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PP :
+ XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P8_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P8_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XYP4_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XYP4_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
+// Upper nibble of XO field for acc/non-acc version is 0x4/0x6.
+multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ let Predicates = [MMA] in {
+ def NAME :
+ XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL,
+ !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PP :
+ XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x20), (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4
+// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA] in {
+ def PN : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NP : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NN : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME#PN :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 5 instructions, unmasked, operand negating.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA] in {
+ def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#PN :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#PN :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// End of class definitions.
+//-----------------------------------------------------------------------------
+
+let Predicates = [MMA] in {
+ def XXMFACC :
+ XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
+ IIC_VecGeneral,
+ [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
+ RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
+ def XXMTACC :
+ XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
+ "#KILL_PAIR", []>,
+ RegConstraint<"$XTp = $XSp">;
+ def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS),
+ "#BUILD_UACC $AT, $AS", []>;
+ // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in
+ // the backend. We avoid CSE here because it generates a copy of the acc
+ // register and this copy is more expensive than calling the intrinsic again.
+ let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
+ def XXSETACCZ :
+ XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
+ }
+ def XVI8GER4SPP :
+ XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
+ "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ let mayStore = 1 in {
+ def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst),
+ "#SPILL_ACC", []>;
+ def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst),
+ "#SPILL_UACC", []>;
+ }
+ let mayLoad = 1, hasSideEffects = 0 in {
+ def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src),
+ "#RESTORE_ACC", []>;
+ def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src),
+ "#RESTORE_UACC", []>;
+ }
+}
+
+let Predicates = [MMA, PrefixInstrs] in {
+ def PMXVI8GER4SPP :
+ MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
+ (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
+ u4imm:$YMSK, u4imm:$PMSK),
+ "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK",
+ IIC_VecGeneral, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+}
+
+// MMA accumulating/non-accumulating instructions.
+//------------------------------------------------------------------------------
+
+// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN
+// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN
+defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB),
+ "xvbf16ger2", "$AT, $XA, $XB">;
+
+// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP
+defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB),
+ "xvi4ger8", "$AT, $XA, $XB">;
+
+// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP
+defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB),
+ "xvi8ger4", "$AT, $XA, $XB">;
+
+// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP
+defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB),
+ "xvi16ger2", "$AT, $XA, $XB">;
+
+// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP
+defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB),
+ "xvi16ger2s", "$AT, $XA, $XB">;
+
+// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN
+// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN
+defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB),
+ "xvf16ger2", "$AT, $XA, $XB">;
+
+// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP
+// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP
+defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB),
+ "xvf32ger", "$AT, $XA, $XB">;
+
+// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN
+// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN
+defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
+ "xvf64ger", "$AT, $XA, $XB">;
+//------------------------------------------------------------------------------
+
+// MMA Intrinsics
+let Predicates = [MMA] in {
+ def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
+ (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
+ (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
+ (XVF64GER $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+}
+
+// MMA Intrinsics
+let Predicates = [MMA, PrefixInstrs] in {
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
+ (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk8Imm:$PMSK)),
+ (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
+ (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk4Imm:$PMSK)),
+ (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)),
+ (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)),
+ (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+}
+
+def Concats {
+ dag VecsToVecPair0 =
+ (v256i1 (INSERT_SUBREG
+ (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
+ $vs1, sub_vsx0));
+ dag VecsToVecPair1 =
+ (v256i1 (INSERT_SUBREG
+ (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
+ $vs3, sub_vsx0));
+ dag VecsToVecQuad =
+ (BUILD_UACC (INSERT_SUBREG
+ (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
+ (KILL_PAIR VecsToVecPair0), sub_pair0),
+ (KILL_PAIR VecsToVecPair1), sub_pair1));
+}
+
+def Extracts {
+ dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0));
+ dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1));
+ dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0));
+ dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1));
+ dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0));
+ dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
+}
+
+let Predicates = [MMA] in {
+ def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
+ (XXMTACC Concats.VecsToVecQuad)>;
+ def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
+ v16i8:$vs3, v16i8:$vs2)),
+ (XXMTACC Concats.VecsToVecQuad)>;
+ def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))),
+ Extracts.Vec0>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 1))),
+ Extracts.Vec1>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 2))),
+ Extracts.Vec2>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 3))),
+ Extracts.Vec3>;
+}
+
+let Predicates = [PairedVectorMemops] in {
+ def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)),
+ Concats.VecsToVecPair0>;
+ def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)),
+ Concats.VecsToVecPair0>;
+ def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 0))),
+ (v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>;
+ def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 1))),
+ (v4i32 (EXTRACT_SUBREG $v, sub_vsx1))>;
+}
+
+let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops] in {
+ def LXVP : DQForm_XTp5_RA17_MEM<6, 0, (outs vsrprc:$XTp),
+ (ins memrix16:$DQ_RA), "lxvp $XTp, $DQ_RA",
+ IIC_LdStLFD, []>;
+ def LXVPX : XForm_XTp5_XAB5<31, 333, (outs vsrprc:$XTp), (ins memrr:$src),
+ "lxvpx $XTp, $src", IIC_LdStLFD,
+ []>;
+}
+
+let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in {
+ def STXVP : DQForm_XTp5_RA17_MEM<6, 1, (outs), (ins vsrprc:$XTp,
+ memrix16:$DQ_RA), "stxvp $XTp, $DQ_RA",
+ IIC_LdStLFD, []>;
+ def STXVPX : XForm_XTp5_XAB5<31, 461, (outs), (ins vsrprc:$XTp, memrr:$dst),
+ "stxvpx $XTp, $dst", IIC_LdStLFD,
+ []>;
+}
+
+let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in {
+ defm PLXVP :
+ 8LS_DForm_R_XTp5_SI34_MEM_p<1, 58, (outs vsrprc:$XTp), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA",
+ IIC_LdStLFD>;
+}
+
+let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in {
+ defm PSTXVP :
+ 8LS_DForm_R_XTp5_SI34_MEM_p<1, 62, (outs), (ins vsrprc:$XTp, memri34:$D_RA),
+ (ins vsrprc:$XTp, memri34_pcrel:$D_RA),
+ "pstxvp $XTp, $D_RA", IIC_LdStLFD>;
+}
+
+let Predicates = [PairedVectorMemops] in {
+ // Intrinsics for Paired Vector Loads.
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
+ let Predicates = [PairedVectorMemops, PrefixInstrs] in {
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
+ }
+ // Intrinsics for Paired Vector Stores.
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst),
+ (STXVP $XSp, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst),
+ (STXVPX $XSp, xaddrX16:$dst)>;
+ let Predicates = [PairedVectorMemops, PrefixInstrs] in {
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst),
+ (PSTXVP $XSp, memri34:$dst)>;
+ }
+}
+
// TODO: We have an added complexity of 500 here. This is only a temporary
// solution to have tablegen consider these patterns first. The way we do
// addressing for PowerPC is complex depending on available D form, X form, or
@@ -753,6 +1836,13 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
// If the PPCmatpcreladdr node is not caught by any other pattern it should be
// caught here and turned into a paddi instruction to materialize the address.
def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+ // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize
+ // tls global address with paddi instruction.
+ def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+ // PPCtlslocalexecmataddr node is used for TLS local exec models to
+ // materialize tls global address with paddi instruction.
+ def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)),
+ (PADDI8 $in, $addr)>;
}
let Predicates = [PrefixInstrs] in {
@@ -797,6 +1887,26 @@ let Predicates = [PrefixInstrs] in {
}
let Predicates = [IsISA3_1] in {
+ def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI),
+ "setbc $RT, $BI", IIC_IntCompare, []>;
+ def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RT), (ins crbitrc:$BI),
+ "setbcr $RT, $BI", IIC_IntCompare, []>;
+ def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RT), (ins crbitrc:$BI),
+ "setnbc $RT, $BI", IIC_IntCompare, []>;
+ def SETNBCR : XForm_XT5_BI5<31, 480, (outs gprc:$RT), (ins crbitrc:$BI),
+ "setnbcr $RT, $BI", IIC_IntCompare, []>;
+
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def SETBC8 : XForm_XT5_BI5<31, 384, (outs g8rc:$RT), (ins crbitrc:$BI),
+ "setbc $RT, $BI", IIC_IntCompare, []>;
+ def SETBCR8 : XForm_XT5_BI5<31, 416, (outs g8rc:$RT), (ins crbitrc:$BI),
+ "setbcr $RT, $BI", IIC_IntCompare, []>;
+ def SETNBC8 : XForm_XT5_BI5<31, 448, (outs g8rc:$RT), (ins crbitrc:$BI),
+ "setnbc $RT, $BI", IIC_IntCompare, []>;
+ def SETNBCR8 : XForm_XT5_BI5<31, 480, (outs g8rc:$RT), (ins crbitrc:$BI),
+ "setnbcr $RT, $BI", IIC_IntCompare, []>;
+ }
+
def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT),
(ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
"vsldbi $VRT, $VRA, $VRB, $SH",
@@ -813,87 +1923,254 @@ let Predicates = [IsISA3_1] in {
(int_ppc_altivec_vsrdbi v16i8:$VRA,
v16i8:$VRB,
i32:$SH))]>;
- def VINSW :
- VXForm_VRT5_UIM5_RB5_ins<207, "vinsw",
- [(set v4i32:$vD,
- (int_ppc_altivec_vinsw v4i32:$vDi, i64:$rB,
- timm:$UIM))]>;
+ defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstribr", "$vT, $vB", IIC_VecGeneral,
+ [(set v16i8:$vT,
+ (int_ppc_altivec_vstribr v16i8:$vB))]>;
+ defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstribl", "$vT, $vB", IIC_VecGeneral,
+ [(set v16i8:$vT,
+ (int_ppc_altivec_vstribl v16i8:$vB))]>;
+ defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstrihr", "$vT, $vB", IIC_VecGeneral,
+ [(set v8i16:$vT,
+ (int_ppc_altivec_vstrihr v8i16:$vB))]>;
+ defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstrihl", "$vT, $vB", IIC_VecGeneral,
+ [(set v8i16:$vT,
+ (int_ppc_altivec_vstrihl v8i16:$vB))]>;
+ def VINSW :
+ VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB),
+ "vinsw $vD, $rB, $UIM", IIC_VecGeneral,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSD :
- VXForm_VRT5_UIM5_RB5_ins<463, "vinsd",
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB,
- timm:$UIM))]>;
+ VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
+ "vinsd $vD, $rB, $UIM", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSBVLX :
VXForm_VTB5_RA5_ins<15, "vinsbvlx",
[(set v16i8:$vD,
- (int_ppc_altivec_vinsbvlx v16i8:$vDi, i64:$rA,
+ (int_ppc_altivec_vinsbvlx v16i8:$vDi, i32:$rA,
v16i8:$vB))]>;
def VINSBVRX :
VXForm_VTB5_RA5_ins<271, "vinsbvrx",
[(set v16i8:$vD,
- (int_ppc_altivec_vinsbvrx v16i8:$vDi, i64:$rA,
+ (int_ppc_altivec_vinsbvrx v16i8:$vDi, i32:$rA,
v16i8:$vB))]>;
def VINSHVLX :
VXForm_VTB5_RA5_ins<79, "vinshvlx",
[(set v8i16:$vD,
- (int_ppc_altivec_vinshvlx v8i16:$vDi, i64:$rA,
+ (int_ppc_altivec_vinshvlx v8i16:$vDi, i32:$rA,
v8i16:$vB))]>;
def VINSHVRX :
VXForm_VTB5_RA5_ins<335, "vinshvrx",
[(set v8i16:$vD,
- (int_ppc_altivec_vinshvrx v8i16:$vDi, i64:$rA,
+ (int_ppc_altivec_vinshvrx v8i16:$vDi, i32:$rA,
v8i16:$vB))]>;
def VINSWVLX :
VXForm_VTB5_RA5_ins<143, "vinswvlx",
[(set v4i32:$vD,
- (int_ppc_altivec_vinswvlx v4i32:$vDi, i64:$rA,
+ (int_ppc_altivec_vinswvlx v4i32:$vDi, i32:$rA,
v4i32:$vB))]>;
def VINSWVRX :
VXForm_VTB5_RA5_ins<399, "vinswvrx",
[(set v4i32:$vD,
- (int_ppc_altivec_vinswvrx v4i32:$vDi, i64:$rA,
+ (int_ppc_altivec_vinswvrx v4i32:$vDi, i32:$rA,
v4i32:$vB))]>;
def VINSBLX :
VXForm_VRT5_RAB5_ins<527, "vinsblx",
[(set v16i8:$vD,
- (int_ppc_altivec_vinsblx v16i8:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSBRX :
VXForm_VRT5_RAB5_ins<783, "vinsbrx",
[(set v16i8:$vD,
- (int_ppc_altivec_vinsbrx v16i8:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSHLX :
VXForm_VRT5_RAB5_ins<591, "vinshlx",
[(set v8i16:$vD,
- (int_ppc_altivec_vinshlx v8i16:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSHRX :
VXForm_VRT5_RAB5_ins<847, "vinshrx",
[(set v8i16:$vD,
- (int_ppc_altivec_vinshrx v8i16:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSWLX :
VXForm_VRT5_RAB5_ins<655, "vinswlx",
[(set v4i32:$vD,
- (int_ppc_altivec_vinswlx v4i32:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSWRX :
VXForm_VRT5_RAB5_ins<911, "vinswrx",
[(set v4i32:$vD,
- (int_ppc_altivec_vinswrx v4i32:$vDi, i64:$rA,
- i64:$rB))]>;
+ (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA,
+ i32:$rB))]>;
def VINSDLX :
- VXForm_VRT5_RAB5_ins<719, "vinsdlx",
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA,
- i64:$rB))]>;
+ VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+ "vinsdlx $vD, $rA, $rB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSDRX :
- VXForm_VRT5_RAB5_ins<975, "vinsdrx",
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA,
- i64:$rB))]>;
-
+ VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+ "vinsdrx $vD, $rA, $rB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextractbm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextractbm v16i8:$vB))]>;
+ def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextracthm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextracthm v8i16:$vB))]>;
+ def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextractwm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextractwm v4i32:$vB))]>;
+ def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextractdm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextractdm v2i64:$vB))]>;
+ def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB),
+ "vextractqm $rD, $vB", IIC_VecGeneral,
+ [(set i32:$rD,
+ (int_ppc_altivec_vextractqm v1i128:$vB))]>;
+ def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpandbm $vD, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (int_ppc_altivec_vexpandbm
+ v16i8:$vB))]>;
+ def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpandhm $vD, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (int_ppc_altivec_vexpandhm
+ v8i16:$vB))]>;
+ def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpandwm $vD, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (int_ppc_altivec_vexpandwm
+ v4i32:$vB))]>;
+ def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpanddm $vD, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (int_ppc_altivec_vexpanddm
+ v2i64:$vB))]>;
+ def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vexpandqm $vD, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vexpandqm
+ v1i128:$vB))]>;
+ def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrbm $vD, $rB", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_mtvsrbm i64:$rB))]>;
+ def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrhm $vD, $rB", IIC_VecGeneral,
+ [(set v8i16:$vD,
+ (int_ppc_altivec_mtvsrhm i64:$rB))]>;
+ def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrwm $vD, $rB", IIC_VecGeneral,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_mtvsrwm i64:$rB))]>;
+ def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrdm $vD, $rB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_mtvsrdm i64:$rB))]>;
+ def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrqm $vD, $rB", IIC_VecGeneral,
+ [(set v1i128:$vD,
+ (int_ppc_altivec_mtvsrqm i64:$rB))]>;
+ def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D),
+ "mtvsrbmi $vD, $D", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_mtvsrbm imm:$D))]>;
+ def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD),
+ (ins vrrc:$vB, u1imm:$MP),
+ "vcntmbb $rD, $vB, $MP", IIC_VecGeneral,
+ [(set i64:$rD, (int_ppc_altivec_vcntmbb
+ v16i8:$vB, timm:$MP))]>;
+ def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD),
+ (ins vrrc:$vB, u1imm:$MP),
+ "vcntmbh $rD, $vB, $MP", IIC_VecGeneral,
+ [(set i64:$rD, (int_ppc_altivec_vcntmbh
+ v8i16:$vB, timm:$MP))]>;
+ def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD),
+ (ins vrrc:$vB, u1imm:$MP),
+ "vcntmbw $rD, $vB, $MP", IIC_VecGeneral,
+ [(set i64:$rD, (int_ppc_altivec_vcntmbw
+ v4i32:$vB, timm:$MP))]>;
+ def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD),
+ (ins vrrc:$vB, u1imm:$MP),
+ "vcntmbd $rD, $vB, $MP", IIC_VecGeneral,
+ [(set i64:$rD, (int_ppc_altivec_vcntmbd
+ v2i64:$vB, timm:$MP))]>;
+ def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextdubvlx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextdubvlx v16i8:$vA,
+ v16i8:$vB,
+ i32:$rC))]>;
+ def VEXTDUBVRX : VAForm_1a<25, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextdubvrx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextdubvrx v16i8:$vA,
+ v16i8:$vB,
+ i32:$rC))]>;
+ def VEXTDUHVLX : VAForm_1a<26, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextduhvlx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextduhvlx v8i16:$vA,
+ v8i16:$vB,
+ i32:$rC))]>;
+ def VEXTDUHVRX : VAForm_1a<27, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextduhvrx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextduhvrx v8i16:$vA,
+ v8i16:$vB,
+ i32:$rC))]>;
+ def VEXTDUWVLX : VAForm_1a<28, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextduwvlx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextduwvlx v4i32:$vA,
+ v4i32:$vB,
+ i32:$rC))]>;
+ def VEXTDUWVRX : VAForm_1a<29, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextduwvrx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextduwvrx v4i32:$vA,
+ v4i32:$vB,
+ i32:$rC))]>;
+ def VEXTDDVLX : VAForm_1a<30, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextddvlx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextddvlx v2i64:$vA,
+ v2i64:$vB,
+ i32:$rC))]>;
+ def VEXTDDVRX : VAForm_1a<31, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
+ "vextddvrx $vD, $vA, $vB, $rC",
+ IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vextddvrx v2i64:$vA,
+ v2i64:$vB,
+ i32:$rC))]>;
def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpdepd $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD,
@@ -961,7 +2238,61 @@ let Predicates = [IsISA3_1] in {
"vclrrb $vD, $vA, $rB", IIC_VecGeneral,
[(set v16i8:$vD,
(int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
-
+ def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulld $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>;
+ def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulhsw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>;
+ def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulhuw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>;
+ def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulhsd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>;
+ def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulhud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>;
+ def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmodsw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>;
+ def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmoduw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>;
+ def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmodsd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>;
+ def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmodud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>;
+ def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivsw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>;
+ def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivuw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>;
+ def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivsd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>;
+ def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>;
+ def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivesw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA,
+ v4i32:$vB))]>;
+ def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdiveuw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA,
+ v4i32:$vB))]>;
+ def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA,
+ v2i64:$vB))]>;
+ def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdiveud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA,
+ v2i64:$vB))]>;
def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
"xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
@@ -980,10 +2311,204 @@ let Predicates = [IsISA3_1] in {
def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>;
def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>;
}
+
+ def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
+ v2i64:$vB))]>;
+ def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuleud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
+ v2i64:$vB))]>;
+ def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmulosd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
+ v2i64:$vB))]>;
+ def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuloud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA,
+ v2i64:$vB))]>;
+ def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+ "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmsumcud
+ v2i64:$vA, v2i64:$vB, v1i128:$vC))]>;
+ def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivsq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>;
+ def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivuq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>;
+ def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdivesq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA,
+ v1i128:$vB))]>;
+ def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vdiveuq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA,
+ v1i128:$vB))]>;
+ def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>;
+ def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>;
+ def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>;
+ def VCMPEQUQ_rec : VCMP_rec <455, "vcmpequq. $vD, $vA, $vB" , v1i128>;
+ def VCMPGTSQ_rec : VCMP_rec <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>;
+ def VCMPGTUQ_rec : VCMP_rec <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>;
+ def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmodsq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>;
+ def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmoduq $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>;
+ def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vextsd2q $vD, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>;
+ def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
+ "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
+ def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
+ "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
+ def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
+ [(set v1i128:$vD,
+ (int_ppc_altivec_vrlqnm v1i128:$vA,
+ v1i128:$vB))]>;
+ def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+ "vrlqmi $vD, $vA, $vB", IIC_VecFP,
+ [(set v1i128:$vD,
+ (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
+ v1i128:$vDi))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
+ def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
+ def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
+ def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
+ def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
+ def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
+ def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
+ def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+}
+
+let Predicates = [IsISA3_1, HasVSX] in {
+ def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
+ def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
+}
+
+// Multiclass defining patterns for Set Boolean Extension Reverse Instructions.
+// This is analogous to the CRNotPat multiclass but specifically for Power10
+// and newer subtargets since the extended forms use Set Boolean instructions.
+// The first two anonymous patterns defined are actually a duplicate of those
+// in CRNotPat, but it is preferable to define both multiclasses as complete
+// ones rather than pulling that small common section out.
+multiclass P10ReverseSetBool<dag pattern, dag result> {
+ def : Pat<pattern, (crnot result)>;
+ def : Pat<(not pattern), result>;
+
+ def : Pat<(i32 (zext pattern)),
+ (SETBCR result)>;
+ def : Pat<(i64 (zext pattern)),
+ (SETBCR8 result)>;
+
+ def : Pat<(i32 (sext pattern)),
+ (SETNBCR result)>;
+ def : Pat<(i64 (sext pattern)),
+ (SETNBCR8 result)>;
+
+ def : Pat<(i32 (anyext pattern)),
+ (SETBCR result)>;
+ def : Pat<(i64 (anyext pattern)),
+ (SETBCR8 result)>;
+}
+
+multiclass IntSetP10RevSetBool<SDNode SetCC, ValueType Ty, ImmLeaf ZExtTy,
+ ImmLeaf SExtTy, PatLeaf Cmpi, PatLeaf Cmpli,
+ PatLeaf Cmp, PatLeaf Cmpl> {
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)),
+ (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)),
+ (EXTRACT_SUBREG (Cmp $s1, $s2), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)),
+ (EXTRACT_SUBREG (Cmpl $s1, $s2), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)),
+ (EXTRACT_SUBREG (Cmp $s1, $s2), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)),
+ (EXTRACT_SUBREG (Cmp $s1, $s2), sub_eq)>;
+
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETUGE)),
+ (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETGE)),
+ (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETULE)),
+ (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETLE)),
+ (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, SExtTy:$imm, SETNE)),
+ (EXTRACT_SUBREG (Cmpi $s1, imm:$imm), sub_eq)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, ZExtTy:$imm, SETNE)),
+ (EXTRACT_SUBREG (Cmpli $s1, imm:$imm), sub_eq)>;
+}
+
+multiclass FSetP10RevSetBool<SDNode SetCC, ValueType Ty, PatLeaf FCmp> {
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
+ defm : P10ReverseSetBool<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)),
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
+}
+
+let Predicates = [IsISA3_1] in {
+ def : Pat<(i32 (zext i1:$in)),
+ (SETBC $in)>;
+ def : Pat<(i64 (zext i1:$in)),
+ (SETBC8 $in)>;
+ def : Pat<(i32 (sext i1:$in)),
+ (SETNBC $in)>;
+ def : Pat<(i64 (sext i1:$in)),
+ (SETNBC8 $in)>;
+ def : Pat<(i32 (anyext i1:$in)),
+ (SETBC $in)>;
+ def : Pat<(i64 (anyext i1:$in)),
+ (SETBC8 $in)>;
+
+ // Instantiation of the set boolean reverse patterns for 32-bit integers.
+ defm : IntSetP10RevSetBool<setcc, i32, immZExt16, imm32SExt16,
+ CMPWI, CMPLWI, CMPW, CMPLW>;
+ defm : P10ReverseSetBool<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+ // Instantiation of the set boolean reverse patterns for 64-bit integers.
+ defm : IntSetP10RevSetBool<setcc, i64, immZExt16, imm64SExt16,
+ CMPDI, CMPLDI, CMPD, CMPLD>;
+ defm : P10ReverseSetBool<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+}
+
+// Instantiation of the set boolean reverse patterns for f32, f64, f128.
+let Predicates = [IsISA3_1, HasFPU] in {
+ defm : FSetP10RevSetBool<setcc, f32, FCMPUS>;
+ defm : FSetP10RevSetBool<setcc, f64, FCMPUD>;
+ defm : FSetP10RevSetBool<setcc, f128, XSCMPUQP>;
}
//---------------------------- Anonymous Patterns ----------------------------//
let Predicates = [IsISA3_1] in {
+ // Exploit the vector multiply high instructions using intrinsics.
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VMULHSW $vA, $vB))>;
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VMULHUW $vA, $vB))>;
+ def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VMULHSD $vA, $vB))>;
+ def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VMULHUD $vA, $vB))>;
def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)),
(v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>;
def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)),
@@ -992,12 +2517,82 @@ let Predicates = [IsISA3_1] in {
(v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>;
def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
(v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
- def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, -1)),
+ def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 1)),
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
+
+ def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
+
+ def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)),
+ (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>;
+}
+
+let Predicates = [IsISA3_1, HasVSX] in {
+ def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)),
+ (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>;
+ def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)),
+ (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;
}
+let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
+ // Store element 0 of a VSX register to memory
+ def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst),
+ (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>;
+ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst),
+ (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>;
+ def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst),
+ (STXVRWX $src, xoaddr:$dst)>;
+ def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst),
+ (STXVRWX $src, xoaddr:$dst)>;
+ def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst),
+ (STXVRDX $src, xoaddr:$dst)>;
+ def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst),
+ (STXVRDX $src, xoaddr:$dst)>;
+ }
+
+// FIXME: The swap is overkill when the shift amount is a constant.
+// We should just fix the constant in the DAG.
+let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
+ def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSLQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSLQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSRQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSRQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSRAQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+ def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+ (v1i128 (VSRAQ v1i128:$VRA,
+ (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+ (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+}
+
+class xxevalPattern <dag pattern, bits<8> imm> :
+ Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
+
let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
i32immNonAllOneNonZero:$A,
@@ -1010,6 +2605,44 @@ let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
def : Pat<(f64 nzFPImmAsi32:$A),
(COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
VSFRC)>;
+
+ // Anonymous patterns for XXEVAL
+ // AND
+ // and(A, B, C)
+ def : xxevalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
+ // and(A, xor(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
+ // and(A, or(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
+ // and(A, nor(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (or v4i32:$vB, v4i32:$vC))),
+ 8>;
+ // and(A, eqv(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (xor v4i32:$vB, v4i32:$vC))),
+ 9>;
+ // and(A, nand(B, C))
+ def : xxevalPattern<(and v4i32:$vA, (vnot_ppc (and v4i32:$vB, v4i32:$vC))),
+ 14>;
+
+ // NAND
+ // nand(A, B, C)
+ def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
+ !sub(255, 1)>;
+ // nand(A, xor(B, C))
+ def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
+ !sub(255, 6)>;
+ // nand(A, or(B, C))
+ def : xxevalPattern<(vnot_ppc (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
+ !sub(255, 7)>;
+ // nand(A, nor(B, C))
+ def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
+ !sub(255, 8)>;
+ // nand(A, eqv(B, C))
+ def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
+ !sub(255, 9)>;
+ // nand(A, nand(B, C))
+ def : xxevalPattern<(or (vnot_ppc v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
+ !sub(255, 14)>;
}
let Predicates = [PrefixInstrs] in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/llvm/lib/Target/PowerPC/PPCInstrQPX.td
deleted file mode 100644
index 2265af2815cb..000000000000
--- a/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ /dev/null
@@ -1,1212 +0,0 @@
-//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the QPX extension to the PowerPC instruction set.
-// Reference:
-// Book Q: QPX Architecture Definition. IBM (as updated in) 2011.
-//
-//===----------------------------------------------------------------------===//
-
-def PPCRegQFRCAsmOperand : AsmOperandClass {
- let Name = "RegQFRC"; let PredicateMethod = "isRegNumber";
-}
-def qfrc : RegisterOperand<QFRC> {
- let ParserMatchClass = PPCRegQFRCAsmOperand;
-}
-def PPCRegQSRCAsmOperand : AsmOperandClass {
- let Name = "RegQSRC"; let PredicateMethod = "isRegNumber";
-}
-def qsrc : RegisterOperand<QSRC> {
- let ParserMatchClass = PPCRegQSRCAsmOperand;
-}
-def PPCRegQBRCAsmOperand : AsmOperandClass {
- let Name = "RegQBRC"; let PredicateMethod = "isRegNumber";
-}
-def qbrc : RegisterOperand<QBRC> {
- let ParserMatchClass = PPCRegQBRCAsmOperand;
-}
-
-//===----------------------------------------------------------------------===//
-// Helpers for defining instructions that directly correspond to intrinsics.
-
-// QPXA1_Int - A AForm_1 intrinsic definition.
-class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
- !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
-// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions).
-class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
- !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
-// QPXA2_Int - A AForm_2 intrinsic definition.
-class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
-// QPXA3_Int - A AForm_3 intrinsic definition.
-class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
- !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>;
-// QPXA4_Int - A AForm_4a intrinsic definition.
-class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
- : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
- !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
- [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
-// QPXX18_Int - A XForm_18 intrinsic definition.
-class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
- : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare,
- [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
-// QPXX19_Int - A XForm_19 intrinsic definition.
-class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
- : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
- !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
- [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
-
-//===----------------------------------------------------------------------===//
-// Pattern Frags.
-
-def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32;
-}]>;
-
-def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
- (truncstore node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
-}]>;
-def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
- (pre_truncst node:$val,
- node:$base, node:$offset), [{
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
-}]>;
-
-def fround_inexact : PatFrag<(ops node:$val), (fpround node:$val), [{
- return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
-}]>;
-
-def fround_exact : PatFrag<(ops node:$val), (fpround node:$val), [{
- return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
-}]>;
-
-let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
- def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>;
-
-//===----------------------------------------------------------------------===//
-// Instruction Definitions.
-
-def HasQPX : Predicate<"Subtarget->hasQPX()">;
-let Predicates = [HasQPX] in {
-let DecoderNamespace = "QPX" in {
-let hasSideEffects = 0 in { // QPX instructions don't have side effects.
-let Uses = [RM] in {
- // Add Instructions
- let isCommutable = 1 in {
- def QVFADD : AForm_2<4, 21,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>;
- def QVFADDSs : AForm_2<0, 21,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>;
- }
- def QVFSUB : AForm_2<4, 20,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>;
- def QVFSUBSs : AForm_2<0, 20,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>;
-
- // Estimate Instructions
- def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfre $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>;
- def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>;
- let isCodeGenOnly = 1 in
- def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfres $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>;
-
- def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfrsqrte $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>;
- def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>;
- let isCodeGenOnly = 1 in
- def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>;
-
- // Multiply Instructions
- let isCommutable = 1 in {
- def QVFMUL : AForm_3<4, 25,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
- "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral,
- [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>;
- let isCodeGenOnly = 1 in
- def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>;
- def QVFMULSs : AForm_3<0, 25,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC),
- "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral,
- [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>;
- }
- def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>;
- def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>;
-
- // Multiply-add instructions
- def QVFMADD : AForm_1<4, 29,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
- "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
- def QVFMADDSs : AForm_1<0, 29,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
- "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
- def QVFNMADD : AForm_1<4, 31,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
- "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
- v4f64:$FRB)))]>;
- let isCodeGenOnly = 1 in
- def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
- def QVFNMADDSs : AForm_1<0, 31,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
- "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
- v4f32:$FRB)))]>;
- def QVFMSUB : AForm_1<4, 28,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
- "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
- (fneg v4f64:$FRB)))]>;
- let isCodeGenOnly = 1 in
- def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
- def QVFMSUBSs : AForm_1<0, 28,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
- "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
- (fneg v4f32:$FRB)))]>;
- def QVFNMSUB : AForm_1<4, 30,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
- "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
- (fneg v4f64:$FRB))))]>;
- let isCodeGenOnly = 1 in
- def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
- def QVFNMSUBSs : AForm_1<0, 30,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
- "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
- (fneg v4f32:$FRB))))]>;
- def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>;
- def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>;
- def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>;
- def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>;
- def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>;
- def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>;
- def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>;
- def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>;
-
- // Select Instruction
- let isCodeGenOnly = 1 in
- def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>;
- def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT),
- (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC),
- "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (vselect v4i1:$FRA,
- v4f64:$FRC, v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT),
- (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC),
- "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (vselect v4i1:$FRA,
- v4f32:$FRC, v4f32:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT),
- (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC),
- "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
- [(set v4i1:$FRT, (vselect v4i1:$FRA,
- v4i1:$FRC, v4i1:$FRB))]>;
-
- // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
- // instruction selection into a branch sequence.
- def SELECT_CC_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QFRC",
- []>;
- def SELECT_CC_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QSRC",
- []>;
- def SELECT_CC_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QBRC",
- []>;
-
- // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
- // register bit directly.
- def SELECT_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
- qfrc:$T, qfrc:$F), "#SELECT_QFRC",
- [(set v4f64:$dst,
- (select i1:$cond, v4f64:$T, v4f64:$F))]>;
- def SELECT_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
- qsrc:$T, qsrc:$F), "#SELECT_QSRC",
- [(set v4f32:$dst,
- (select i1:$cond, v4f32:$T, v4f32:$F))]>;
- def SELECT_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
- qbrc:$T, qbrc:$F), "#SELECT_QBRC",
- [(set v4i1:$dst,
- (select i1:$cond, v4i1:$T, v4i1:$F))]>;
-
- // Convert and Round Instructions
- def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
- let isCodeGenOnly = 1 in
- def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB),
- "qvfctid $FRT, $FRB", IIC_FPGeneral, []>;
-
- def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>;
- def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>;
- def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>;
- def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>;
- def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>;
- def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>;
- def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>;
- def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>;
- let isCodeGenOnly = 1 in
- def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB),
- "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>;
-
- def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>;
- def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>;
- def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>;
-
- let isCodeGenOnly = 1 in
- def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>;
- def QVFRSPs : XForm_19<4, 12,
- (outs qsrc:$FRT), (ins qfrc:$FRB),
- "qvfrsp $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>;
-
- def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfriz $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfriz $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>;
-
- def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfrin $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (fround v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfrin $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fround v4f32:$FRB))]>;
-
- def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfrip $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (fceil v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfrip $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (fceil v4f32:$FRB))]>;
-
- def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfrim $FRT, $FRB", IIC_FPGeneral,
- [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfrim $FRT, $FRB", IIC_FPGeneral,
- [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>;
-
- // Move Instructions
- def QVFMR : XForm_19<4, 72,
- (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfmr $FRT, $FRB", IIC_VecPerm,
- [/* (set v4f64:$FRT, v4f64:$FRB) */]>;
- let isCodeGenOnly = 1 in {
- def QVFMRs : XForm_19<4, 72,
- (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfmr $FRT, $FRB", IIC_VecPerm,
- [/* (set v4f32:$FRT, v4f32:$FRB) */]>;
- def QVFMRb : XForm_19<4, 72,
- (outs qbrc:$FRT), (ins qbrc:$FRB),
- "qvfmr $FRT, $FRB", IIC_VecPerm,
- [/* (set v4i1:$FRT, v4i1:$FRB) */]>;
- }
- def QVFNEG : XForm_19<4, 40,
- (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfneg $FRT, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (fneg v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFNEGs : XForm_19<4, 40,
- (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfneg $FRT, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (fneg v4f32:$FRB))]>;
- def QVFABS : XForm_19<4, 264,
- (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfabs $FRT, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (fabs v4f64:$FRB))]>;
- let isCodeGenOnly = 1 in
- def QVFABSs : XForm_19<4, 264,
- (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfabs $FRT, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (fabs v4f32:$FRB))]>;
- def QVFNABS : XForm_19<4, 136,
- (outs qfrc:$FRT), (ins qfrc:$FRB),
- "qvfnabs $FRT, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>;
- let isCodeGenOnly = 1 in
- def QVFNABSs : XForm_19<4, 136,
- (outs qsrc:$FRT), (ins qsrc:$FRB),
- "qvfnabs $FRT, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>;
- def QVFCPSGN : XForm_18<4, 8,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
- [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>;
- let isCodeGenOnly = 1 in
- def QVFCPSGNs : XForm_18<4, 8,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
- [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>;
-
- def QVALIGNI : Z23Form_1<4, 5,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx),
- "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
- [(set v4f64:$FRT,
- (PPCqvaligni v4f64:$FRA, v4f64:$FRB,
- (i32 imm:$idx)))]>;
- let isCodeGenOnly = 1 in
- def QVALIGNIs : Z23Form_1<4, 5,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx),
- "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
- [(set v4f32:$FRT,
- (PPCqvaligni v4f32:$FRA, v4f32:$FRB,
- (i32 imm:$idx)))]>;
- let isCodeGenOnly = 1 in
- def QVALIGNIb : Z23Form_1<4, 5,
- (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx),
- "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
- [(set v4i1:$FRT,
- (PPCqvaligni v4i1:$FRA, v4i1:$FRB,
- (i32 imm:$idx)))]>;
-
- def QVESPLATI : Z23Form_2<4, 37,
- (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx),
- "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
- [(set v4f64:$FRT,
- (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>;
- let isCodeGenOnly = 1 in
- def QVESPLATIs : Z23Form_2<4, 37,
- (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx),
- "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
- [(set v4f32:$FRT,
- (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>;
- let isCodeGenOnly = 1 in
- def QVESPLATIb : Z23Form_2<4, 37,
- (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx),
- "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
- [(set v4i1:$FRT,
- (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>;
-
- def QVFPERM : AForm_1<4, 6,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
- "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
- [(set v4f64:$FRT,
- (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
- let isCodeGenOnly = 1 in
- def QVFPERMs : AForm_1<4, 6,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC),
- "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
- [(set v4f32:$FRT,
- (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>;
-
- let isReMaterializable = 1, isAsCheapAsAMove = 1 in
- def QVGPCI : Z23Form_3<4, 133,
- (outs qfrc:$FRT), (ins u12imm:$idx),
- "qvgpci $FRT, $idx", IIC_VecPerm,
- [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>;
-
- // Compare Instruction
- let isCodeGenOnly = 1 in
- def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>;
- def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>;
- let isCodeGenOnly = 1 in
- def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>;
- def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>;
- def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>;
- def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
- "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>;
- let isCodeGenOnly = 1 in
- def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
- "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
- [(set v4i1:$FRT,
- (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>;
-
- let isCodeGenOnly = 1 in
- def QVFLOGICAL : XForm_20<4, 4,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt),
- "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
- def QVFLOGICALb : XForm_20<4, 4,
- (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
- "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
- let isCodeGenOnly = 1 in
- def QVFLOGICALs : XForm_20<4, 4,
- (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
- "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
-
- // Load indexed instructions
- let mayLoad = 1 in {
- def QVLFDX : XForm_1_memOp<31, 583,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfdx $FRT, $src", IIC_LdStLFD,
- [(set v4f64:$FRT, (load xoaddr:$src))]>;
- let isCodeGenOnly = 1 in
- def QVLFDXb : XForm_1_memOp<31, 583,
- (outs qbrc:$FRT), (ins memrr:$src),
- "qvlfdx $FRT, $src", IIC_LdStLFD, []>;
-
- let RC = 1 in
- def QVLFDXA : XForm_1<31, 583,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfdxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFDUX : XForm_1<31, 615,
- (outs qfrc:$FRT, ptr_rc_nor0:$ea_result),
- (ins memrr:$src),
- "qvlfdux $FRT, $src", IIC_LdStLFDU, []>,
- RegConstraint<"$src.ptrreg = $ea_result">,
- NoEncode<"$ea_result">;
- let RC = 1 in
- def QVLFDUXA : XForm_1<31, 615,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfduxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFSX : XForm_1_memOp<31, 519,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfsx $FRT, $src", IIC_LdStLFD,
- [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>;
-
- let isCodeGenOnly = 1 in
- def QVLFSXb : XForm_1<31, 519,
- (outs qbrc:$FRT), (ins memrr:$src),
- "qvlfsx $FRT, $src", IIC_LdStLFD,
- [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>;
- let isCodeGenOnly = 1 in
- def QVLFSXs : XForm_1_memOp<31, 519,
- (outs qsrc:$FRT), (ins memrr:$src),
- "qvlfsx $FRT, $src", IIC_LdStLFD,
- [(set v4f32:$FRT, (load xoaddr:$src))]>;
-
- let RC = 1 in
- def QVLFSXA : XForm_1<31, 519,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfsxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFSUX : XForm_1<31, 551,
- (outs qsrc:$FRT, ptr_rc_nor0:$ea_result),
- (ins memrr:$src),
- "qvlfsux $FRT, $src", IIC_LdStLFDU, []>,
- RegConstraint<"$src.ptrreg = $ea_result">,
- NoEncode<"$ea_result">;
-
- let RC = 1 in
- def QVLFSUXA : XForm_1<31, 551,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFCDX : XForm_1<31, 71,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcdx $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFCDXA : XForm_1<31, 71,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFCDUX : XForm_1<31, 103,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcdux $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFCDUXA : XForm_1<31, 103,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFCSX : XForm_1<31, 7,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
- let isCodeGenOnly = 1 in
- def QVLFCSXs : XForm_1<31, 7,
- (outs qsrc:$FRT), (ins memrr:$src),
- "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
-
- let RC = 1 in
- def QVLFCSXA : XForm_1<31, 7,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFCSUX : XForm_1<31, 39,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcsux $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFCSUXA : XForm_1<31, 39,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFIWAX : XForm_1<31, 871,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfiwax $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFIWAXA : XForm_1<31, 871,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>;
-
- def QVLFIWZX : XForm_1<31, 839,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>;
- let RC = 1 in
- def QVLFIWZXA : XForm_1<31, 839,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>;
- }
-
-
- def QVLPCLDX : XForm_1<31, 582,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlpcldx $FRT, $src", IIC_LdStLFD, []>;
- def QVLPCLSX : XForm_1<31, 518,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlpclsx $FRT, $src", IIC_LdStLFD, []>;
- let isCodeGenOnly = 1 in
- def QVLPCLSXint : XForm_11<31, 518,
- (outs qfrc:$FRT), (ins G8RC:$src),
- "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>;
- def QVLPCRDX : XForm_1<31, 70,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>;
- def QVLPCRSX : XForm_1<31, 6,
- (outs qfrc:$FRT), (ins memrr:$src),
- "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>;
-
- // Store indexed instructions
- let mayStore = 1 in {
- def QVSTFDX : XForm_8_memOp<31, 711,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfdx $FRT, $dst", IIC_LdStSTFD,
- [(store qfrc:$FRT, xoaddr:$dst)]>;
- let isCodeGenOnly = 1 in
- def QVSTFDXb : XForm_8_memOp<31, 711,
- (outs), (ins qbrc:$FRT, memrr:$dst),
- "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>;
-
- let RC = 1 in
- def QVSTFDXA : XForm_8<31, 711,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res),
- (ins qfrc:$FRT, memrr:$dst),
- "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
- NoEncode<"$ea_res">;
-
- let RC = 1 in
- def QVSTFDUXA : XForm_8<31, 743,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFDXI : XForm_8<31, 709,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFDXIA : XForm_8<31, 709,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFDUXI : XForm_8<31, 741,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFDUXIA : XForm_8<31, 741,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFSX : XForm_8_memOp<31, 647,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsx $FRT, $dst", IIC_LdStSTFD,
- [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>;
- let isCodeGenOnly = 1 in
- def QVSTFSXs : XForm_8_memOp<31, 647,
- (outs), (ins qsrc:$FRT, memrr:$dst),
- "qvstfsx $FRT, $dst", IIC_LdStSTFD,
- [(store qsrc:$FRT, xoaddr:$dst)]>;
-
- let RC = 1 in
- def QVSTFSXA : XForm_8<31, 647,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
- (ins qsrc:$FRT, memrr:$dst),
- "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
- NoEncode<"$ea_res">;
- let isCodeGenOnly = 1 in
- def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
- (ins qfrc:$FRT, memrr:$dst),
- "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
- NoEncode<"$ea_res">;
-
- let RC = 1 in
- def QVSTFSUXA : XForm_8<31, 679,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFSXI : XForm_8<31, 645,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFSXIA : XForm_8<31, 645,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFSUXI : XForm_8<31, 677,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFSUXIA : XForm_8<31, 677,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCDX : XForm_8<31, 199,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCDXA : XForm_8<31, 199,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCSX : XForm_8<31, 135,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
- let isCodeGenOnly = 1 in
- def QVSTFCSXs : XForm_8<31, 135,
- (outs), (ins qsrc:$FRT, memrr:$dst),
- "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
-
- let RC = 1 in
- def QVSTFCSXA : XForm_8<31, 135,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCDUX : XForm_8<31, 231,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCDUXA : XForm_8<31, 231,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCSUX : XForm_8<31, 167,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCSUXA : XForm_8<31, 167,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCDXI : XForm_8<31, 197,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCDXIA : XForm_8<31, 197,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCSXI : XForm_8<31, 133,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCSXIA : XForm_8<31, 133,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCDUXI : XForm_8<31, 229,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCDUXIA : XForm_8<31, 229,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFCSUXI : XForm_8<31, 165,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFCSUXIA : XForm_8<31, 165,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>;
-
- def QVSTFIWX : XForm_8<31, 967,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>;
- let RC = 1 in
- def QVSTFIWXA : XForm_8<31, 967,
- (outs), (ins qfrc:$FRT, memrr:$dst),
- "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>;
- }
-}
-
-} // neverHasSideEffects
-}
-
-def : InstAlias<"qvfclr $FRT",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>;
-def : InstAlias<"qvfand $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>;
-def : InstAlias<"qvfandc $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>;
-def : InstAlias<"qvfctfb $FRT, $FRA",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>;
-def : InstAlias<"qvfxor $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>;
-def : InstAlias<"qvfor $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>;
-def : InstAlias<"qvfnor $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>;
-def : InstAlias<"qvfequ $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>;
-def : InstAlias<"qvfnot $FRT, $FRA",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>;
-def : InstAlias<"qvforc $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>;
-def : InstAlias<"qvfnand $FRT, $FRA, $FRB",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>;
-def : InstAlias<"qvfset $FRT",
- (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>;
-
-//===----------------------------------------------------------------------===//
-// Additional QPX Patterns
-//
-
-def : Pat<(v4f64 (scalar_to_vector f64:$A)),
- (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>;
-def : Pat<(v4f32 (scalar_to_vector f32:$A)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
-
-def : Pat<(f64 (extractelt v4f64:$S, 0)),
- (EXTRACT_SUBREG $S, sub_64)>;
-def : Pat<(f32 (extractelt v4f32:$S, 0)),
- (EXTRACT_SUBREG $S, sub_64)>;
-
-def : Pat<(f64 (extractelt v4f64:$S, 1)),
- (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
-def : Pat<(f64 (extractelt v4f64:$S, 2)),
- (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
-def : Pat<(f64 (extractelt v4f64:$S, 3)),
- (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
-
-def : Pat<(f32 (extractelt v4f32:$S, 1)),
- (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
-def : Pat<(f32 (extractelt v4f32:$S, 2)),
- (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
-def : Pat<(f32 (extractelt v4f32:$S, 3)),
- (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
-
-def : Pat<(f64 (extractelt v4f64:$S, i64:$F)),
- (EXTRACT_SUBREG (QVFPERM $S, $S,
- (QVLPCLSXint (RLDICR $F, 2,
- /* 63-2 = */ 61))),
- sub_64)>;
-def : Pat<(f32 (extractelt v4f32:$S, i64:$F)),
- (EXTRACT_SUBREG (QVFPERMs $S, $S,
- (QVLPCLSXint (RLDICR $F, 2,
- /* 63-2 = */ 61))),
- sub_64)>;
-
-def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFPERM $A, $B, $C)>;
-
-def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B),
- (QVFCPSGN $A, $B)>;
-
-// FCOPYSIGN's operand types need not agree.
-def : Pat<(fcopysign v4f64:$frB, v4f32:$frA),
- (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>;
-def : Pat<(fcopysign QSRC:$frB, QFRC:$frA),
- (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>;
-
-def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>;
-def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>;
-def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>;
-
-def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>;
-def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>;
-def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>;
-def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>;
-
-def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>;
-def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>;
-
-def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B),
- (QVFADD $A, $B)>;
-def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B),
- (QVFSUB $A, $B)>;
-def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
- (QVFMUL $A, $B)>;
-
-// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
-def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
- (QVFNMSUB $A, $C, $B)>;
-def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
- (QVFNMSUB $A, $C, $B)>;
-def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
- (QVFNMSUBSs $A, $C, $B)>;
-def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
- (QVFNMSUBSs $A, $C, $B)>;
-
-def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFMADD $A, $B, $C)>;
-def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFNMADD $A, $B, $C)>;
-def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFMSUB $A, $B, $C)>;
-def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C),
- (QVFNMSUB $A, $B, $C)>;
-
-def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src),
- (QVLFDX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
- (QVLFDXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src),
- (QVLFSX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
- (QVLFSXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src),
- (QVLFCDXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src),
- (QVLFCDX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src),
- (QVLFCSXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src),
- (QVLFCSX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
- (QVLFDXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src),
- (QVLFIWAXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src),
- (QVLFIWAX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src),
- (QVLFIWZXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src),
- (QVLFIWZX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
- (QVLFSXA xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src),
- (QVLPCLDX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src),
- (QVLPCLSX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src),
- (QVLPCRDX xoaddr:$src)>;
-def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src),
- (QVLPCRSX xoaddr:$src)>;
-
-def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst),
- (QVSTFDX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst),
- (QVSTFSX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst),
- (QVSTFCDXA $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst),
- (QVSTFCDX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst),
- (QVSTFCSXA $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst),
- (QVSTFCSX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst),
- (QVSTFDXA $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst),
- (QVSTFIWXA $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst),
- (QVSTFIWX $T, xoaddr:$dst)>;
-def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst),
- (QVSTFSXA $T, xoaddr:$dst)>;
-
-def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
- (QVSTFDUX $rS, $ptrreg, $ptroff)>;
-def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
- (QVSTFSUX $rS, $ptrreg, $ptroff)>;
-def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
- (QVSTFSUXs $rS, $ptrreg, $ptroff)>;
-
-def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)),
- (QVFLOGICAL $A, $B, imm:$idx)>;
-def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)),
- (QVGPCI imm:$idx)>;
-
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE),
- (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE),
- (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE),
- (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO),
- (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ),
- (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT),
- (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE),
- (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
- (QVFCMPLTb $FRA, $FRB), (i32 13))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT),
- (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
- (QVFTSTNANb $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE),
- (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
- (QVFCMPGTb $FRA, $FRB), (i32 13))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE),
- (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
- (QVFCMPEQb $FRA, $FRB), (i32 13))>;
-
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ),
- (QVFCMPEQb $FRA, $FRB)>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT),
- (QVFCMPGTb $FRA, $FRB)>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE),
- (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
- (QVFCMPLTb $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT),
- (QVFCMPLTb $FRA, $FRB)>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE),
- (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
- (QVFCMPGTb $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE),
- (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
- (QVFCMPEQb $FRA, $FRB), (i32 10))>;
-
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE),
- (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE),
- (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE),
- (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO),
- (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ),
- (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT),
- (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE),
- (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
- (QVFCMPLTbs $FRA, $FRB), (i32 13))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT),
- (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
- (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE),
- (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
- (QVFCMPGTbs $FRA, $FRB), (i32 13))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE),
- (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
- (QVFCMPEQbs $FRA, $FRB), (i32 13))>;
-
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ),
- (QVFCMPEQbs $FRA, $FRB)>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT),
- (QVFCMPGTbs $FRA, $FRB)>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE),
- (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
- (QVFCMPLTbs $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT),
- (QVFCMPLTbs $FRA, $FRB)>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE),
- (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
- (QVFCMPGTbs $FRA, $FRB), (i32 10))>;
-def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE),
- (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
- (QVFCMPEQbs $FRA, $FRB), (i32 10))>;
-
-def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 4))>;
-def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 8))>;
-def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 9))>;
-def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 13))>;
-def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)),
- (QVFLOGICALb $FRA, $FRB, (i32 14))>;
-
-def : Pat<(and v4i1:$FRA, v4i1:$FRB),
- (QVFLOGICALb $FRA, $FRB, (i32 1))>;
-def : Pat<(or v4i1:$FRA, v4i1:$FRB),
- (QVFLOGICALb $FRA, $FRB, (i32 7))>;
-def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
- (QVFLOGICALb $FRA, $FRB, (i32 6))>;
-def : Pat<(not v4i1:$FRA),
- (QVFLOGICALb $FRA, $FRA, (i32 10))>;
-
-def : Pat<(v4f64 (fpextend v4f32:$src)),
- (COPY_TO_REGCLASS $src, QFRC)>;
-
-def : Pat<(v4f32 (fround_exact v4f64:$src)),
- (COPY_TO_REGCLASS $src, QSRC)>;
-
-// Extract the underlying floating-point values from the
-// QPX (-1.0, 1.0) boolean representation.
-def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
- (COPY_TO_REGCLASS $src, QFRC)>;
-
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
- (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)),
- (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
- (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)),
- (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
- (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
- (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)),
- (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
- (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)),
- (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
- (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
- (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)),
- (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
- (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)),
- (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
- (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
- (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)),
- (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
- (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)),
- (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
- (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
- (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)),
- (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
- (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)),
- (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
- (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
- (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)),
- (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
- (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)),
- (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
- (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-} // end HasQPX
-
-let Predicates = [HasQPX, NoNaNsFPMath] in {
-def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
- (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>;
-def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
- (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>;
-
-def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
- (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>;
-def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
- (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>;
-}
-
-let Predicates = [HasQPX, NaNsFPMath] in {
-// When either of these operands is NaN, we should return the other operand.
-// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need
-// to explicitly or with a NaN test on the second operand.
-def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
- (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
- (QVFTSTNANb $FRB, $FRB), (i32 7)),
- $FRB, $FRA)>;
-def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
- (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
- (QVFTSTNANb $FRB, $FRB), (i32 7)),
- $FRB, $FRA)>;
-
-def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
- (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
- (QVFTSTNANbs $FRB, $FRB), (i32 7)),
- $FRB, $FRA)>;
-def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
- (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
- (QVFTSTNANbs $FRB, $FRB), (i32 7)),
- $FRB, $FRA)>;
-}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 858eb0c9fe50..299b34ca8283 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -820,16 +820,6 @@ def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$rS, memrr:$dst),
} // HasSPE
let Predicates = [HasSPE] in {
-def : Pat<(f64 (extloadf32 iaddr:$src)),
- (COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>;
-def : Pat<(f64 (extloadf32 xaddr:$src)),
- (COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>;
-
-def : Pat<(f64 (fpextend f32:$src)),
- (COPY_TO_REGCLASS $src, SPERC)>;
-}
-
-let Predicates = [HasSPE] in {
def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst),
(ins crrc:$cond, spe4rc:$T, spe4rc:$F,
i32imm:$BROPC), "#SELECT_CC_SPE4",
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 9ba5058a6f81..db6e00c71b89 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -145,6 +145,7 @@ def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
def HasVSX : Predicate<"Subtarget->hasVSX()">;
def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
def IsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
+def IsPPC64 : Predicate<"Subtarget->isPPC64()">;
def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">;
def HasP8Vector : Predicate<"Subtarget->hasP8Vector()">;
def HasDirectMove : Predicate<"Subtarget->hasDirectMove()">;
@@ -167,7 +168,7 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
def _rec : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
[(set InTy:$XT,
- (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
+ (InTy (PPCvcmp_rec InTy:$XA, InTy:$XB, xo)))]>,
isRecordForm;
}
}
@@ -362,7 +363,8 @@ let hasSideEffects = 0 in {
}
} // mayStore
- let Uses = [RM], mayRaiseFPException = 1 in {
+ let mayRaiseFPException = 1 in {
+ let Uses = [RM] in {
// Add/Mul Instructions
let isCommutable = 1 in {
def XSADDDP : XX3Form<60, 32,
@@ -622,12 +624,30 @@ let hasSideEffects = 0 in {
"xsrsqrtedp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+ let mayRaiseFPException = 0 in {
def XSTDIVDP : XX3Form_1<60, 61,
(outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
"xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
def XSTSQRTDP : XX2Form_1<60, 106,
(outs crrc:$crD), (ins vsfrc:$XB),
- "xstsqrtdp $crD, $XB", IIC_FPCompare, []>;
+ "xstsqrtdp $crD, $XB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt f64:$XB))]>;
+ def XVTDIVDP : XX3Form_1<60, 125,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XVTDIVSP : XX3Form_1<60, 93,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ def XVTSQRTDP : XX2Form_1<60, 234,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtdp $crD, $XB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>;
+ def XVTSQRTSP : XX2Form_1<60, 170,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtsp $crD, $XB", IIC_FPCompare,
+ [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>;
+ }
def XVDIVDP : XX3Form<60, 120,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
@@ -647,20 +667,6 @@ let hasSideEffects = 0 in {
"xvsqrtsp $XT, $XB", IIC_FPSqrtS,
[(set v4f32:$XT, (any_fsqrt v4f32:$XB))]>;
- def XVTDIVDP : XX3Form_1<60, 125,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
- "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
- def XVTDIVSP : XX3Form_1<60, 93,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
- "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
-
- def XVTSQRTDP : XX2Form_1<60, 234,
- (outs crrc:$crD), (ins vsrc:$XB),
- "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>;
- def XVTSQRTSP : XX2Form_1<60, 170,
- (outs crrc:$crD), (ins vsrc:$XB),
- "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>;
-
def XVREDP : XX2Form<60, 218,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvredp $XT, $XB", IIC_VecFP,
@@ -707,6 +713,7 @@ let hasSideEffects = 0 in {
int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>;
// Move Instructions
+ let mayRaiseFPException = 0 in {
def XSABSDP : XX2Form<60, 345,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsabsdp $XT, $XB", IIC_VecFP,
@@ -760,6 +767,7 @@ let hasSideEffects = 0 in {
(outs vsrc:$XT), (ins vsrc:$XB),
"xvnegsp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (fneg v4f32:$XB))]>;
+ }
// Conversion Instructions
def XSCVDPSP : XX2Form<60, 265,
@@ -768,50 +776,50 @@ let hasSideEffects = 0 in {
def XSCVDPSXDS : XX2Form<60, 344,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpsxds $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfctidz f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fctidz f64:$XB))]>;
let isCodeGenOnly = 1 in
def XSCVDPSXDSs : XX2Form<60, 344,
(outs vssrc:$XT), (ins vssrc:$XB),
"xscvdpsxds $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfctidz f32:$XB))]>;
+ [(set f32:$XT, (PPCany_fctidz f32:$XB))]>;
def XSCVDPSXWS : XX2Form<60, 88,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpsxws $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fctiwz f64:$XB))]>;
let isCodeGenOnly = 1 in
def XSCVDPSXWSs : XX2Form<60, 88,
(outs vssrc:$XT), (ins vssrc:$XB),
"xscvdpsxws $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfctiwz f32:$XB))]>;
+ [(set f32:$XT, (PPCany_fctiwz f32:$XB))]>;
def XSCVDPUXDS : XX2Form<60, 328,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpuxds $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fctiduz f64:$XB))]>;
let isCodeGenOnly = 1 in
def XSCVDPUXDSs : XX2Form<60, 328,
(outs vssrc:$XT), (ins vssrc:$XB),
"xscvdpuxds $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfctiduz f32:$XB))]>;
+ [(set f32:$XT, (PPCany_fctiduz f32:$XB))]>;
def XSCVDPUXWS : XX2Form<60, 72,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpuxws $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fctiwuz f64:$XB))]>;
let isCodeGenOnly = 1 in
def XSCVDPUXWSs : XX2Form<60, 72,
(outs vssrc:$XT), (ins vssrc:$XB),
"xscvdpuxws $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfctiwuz f32:$XB))]>;
+ [(set f32:$XT, (PPCany_fctiwuz f32:$XB))]>;
def XSCVSPDP : XX2Form<60, 329,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvspdp $XT, $XB", IIC_VecFP, []>;
def XSCVSXDDP : XX2Form<60, 376,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvsxddp $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfcfid f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fcfid f64:$XB))]>;
def XSCVUXDDP : XX2Form<60, 360,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvuxddp $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfcfidu f64:$XB))]>;
+ [(set f64:$XT, (PPCany_fcfidu f64:$XB))]>;
def XVCVDPSP : XX2Form<60, 393,
(outs vsrc:$XT), (ins vsrc:$XB),
@@ -820,7 +828,7 @@ let hasSideEffects = 0 in {
def XVCVDPSXDS : XX2Form<60, 472,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpsxds $XT, $XB", IIC_VecFP,
- [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>;
+ [(set v2i64:$XT, (any_fp_to_sint v2f64:$XB))]>;
def XVCVDPSXWS : XX2Form<60, 216,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpsxws $XT, $XB", IIC_VecFP,
@@ -828,7 +836,7 @@ let hasSideEffects = 0 in {
def XVCVDPUXDS : XX2Form<60, 456,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpuxds $XT, $XB", IIC_VecFP,
- [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>;
+ [(set v2i64:$XT, (any_fp_to_uint v2f64:$XB))]>;
def XVCVDPUXWS : XX2Form<60, 200,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvdpuxws $XT, $XB", IIC_VecFP,
@@ -844,56 +852,105 @@ let hasSideEffects = 0 in {
def XVCVSPSXWS : XX2Form<60, 152,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspsxws $XT, $XB", IIC_VecFP,
- [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>;
+ [(set v4i32:$XT, (any_fp_to_sint v4f32:$XB))]>;
def XVCVSPUXDS : XX2Form<60, 392,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspuxds $XT, $XB", IIC_VecFP, []>;
def XVCVSPUXWS : XX2Form<60, 136,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvspuxws $XT, $XB", IIC_VecFP,
- [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>;
+ [(set v4i32:$XT, (any_fp_to_uint v4f32:$XB))]>;
def XVCVSXDDP : XX2Form<60, 504,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxddp $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>;
+ [(set v2f64:$XT, (any_sint_to_fp v2i64:$XB))]>;
def XVCVSXDSP : XX2Form<60, 440,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxdsp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (int_ppc_vsx_xvcvsxdsp v2i64:$XB))]>;
- def XVCVSXWDP : XX2Form<60, 248,
- (outs vsrc:$XT), (ins vsrc:$XB),
- "xvcvsxwdp $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>;
def XVCVSXWSP : XX2Form<60, 184,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvsxwsp $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (sint_to_fp v4i32:$XB))]>;
+ [(set v4f32:$XT, (any_sint_to_fp v4i32:$XB))]>;
def XVCVUXDDP : XX2Form<60, 488,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxddp $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>;
+ [(set v2f64:$XT, (any_uint_to_fp v2i64:$XB))]>;
def XVCVUXDSP : XX2Form<60, 424,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxdsp $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (int_ppc_vsx_xvcvuxdsp v2i64:$XB))]>;
+ def XVCVUXWSP : XX2Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (any_uint_to_fp v4i32:$XB))]>;
+
+ let mayRaiseFPException = 0 in {
+ def XVCVSXWDP : XX2Form<60, 248,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>;
def XVCVUXWDP : XX2Form<60, 232,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxwdp $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>;
- def XVCVUXWSP : XX2Form<60, 168,
+ }
+
+ // Rounding Instructions respecting current rounding mode
+ def XSRDPIC : XX2Form<60, 107,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpic $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
- "xvcvuxwsp $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>;
+ "xvrdpic $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ def XVRSPIC : XX2Form<60, 171,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspic $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ // Max/Min Instructions
+ let isCommutable = 1 in {
+ def XSMAXDP : XX3Form<60, 160,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
+ def XSMINDP : XX3Form<60, 168,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
+
+ def XVMAXDP : XX3Form<60, 224,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
+ def XVMINDP : XX3Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
+
+ def XVMAXSP : XX3Form<60, 192,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
+ def XVMINSP : XX3Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvminsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
+ } // isCommutable
+ } // Uses = [RM]
- // Rounding Instructions
+ // Rounding Instructions with static direction.
def XSRDPI : XX2Form<60, 73,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpi $XT, $XB", IIC_VecFP,
[(set f64:$XT, (any_fround f64:$XB))]>;
- def XSRDPIC : XX2Form<60, 107,
- (outs vsfrc:$XT), (ins vsfrc:$XB),
- "xsrdpic $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (any_fnearbyint f64:$XB))]>;
def XSRDPIM : XX2Form<60, 121,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpim $XT, $XB", IIC_VecFP,
@@ -911,10 +968,6 @@ let hasSideEffects = 0 in {
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpi $XT, $XB", IIC_VecFP,
[(set v2f64:$XT, (any_fround v2f64:$XB))]>;
- def XVRDPIC : XX2Form<60, 235,
- (outs vsrc:$XT), (ins vsrc:$XB),
- "xvrdpic $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>;
def XVRDPIM : XX2Form<60, 249,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpim $XT, $XB", IIC_VecFP,
@@ -932,10 +985,6 @@ let hasSideEffects = 0 in {
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspi $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (any_fround v4f32:$XB))]>;
- def XVRSPIC : XX2Form<60, 171,
- (outs vsrc:$XT), (ins vsrc:$XB),
- "xvrspic $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>;
def XVRSPIM : XX2Form<60, 185,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspim $XT, $XB", IIC_VecFP,
@@ -948,43 +997,7 @@ let hasSideEffects = 0 in {
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspiz $XT, $XB", IIC_VecFP,
[(set v4f32:$XT, (any_ftrunc v4f32:$XB))]>;
-
- // Max/Min Instructions
- let isCommutable = 1 in {
- def XSMAXDP : XX3Form<60, 160,
- (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
- [(set vsfrc:$XT,
- (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
- def XSMINDP : XX3Form<60, 168,
- (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmindp $XT, $XA, $XB", IIC_VecFP,
- [(set vsfrc:$XT,
- (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
-
- def XVMAXDP : XX3Form<60, 224,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
- [(set vsrc:$XT,
- (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
- def XVMINDP : XX3Form<60, 232,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmindp $XT, $XA, $XB", IIC_VecFP,
- [(set vsrc:$XT,
- (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
-
- def XVMAXSP : XX3Form<60, 192,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
- [(set vsrc:$XT,
- (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
- def XVMINSP : XX3Form<60, 200,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvminsp $XT, $XA, $XB", IIC_VecFP,
- [(set vsrc:$XT,
- (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
- } // isCommutable
- } // Uses = [RM], mayRaiseFPException
+ } // mayRaiseFPException
// Logical Instructions
let isCommutable = 1 in
@@ -1170,7 +1183,7 @@ let Predicates = [HasVSX, HasP8Vector] in {
"xsresp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfre f32:$XB))]>;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
- let hasSideEffects = 1, mayRaiseFPException = 1 in
+ let hasSideEffects = 1 in
def XSRSP : XX2Form<60, 281,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xsrsp $XT, $XB", IIC_VecFP,
@@ -1268,18 +1281,18 @@ let Predicates = [HasVSX, HasP8Vector] in {
def XSCVSXDSP : XX2Form<60, 312,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xscvsxdsp $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfcfids f64:$XB))]>;
+ [(set f32:$XT, (PPCany_fcfids f64:$XB))]>;
def XSCVUXDSP : XX2Form<60, 296,
(outs vssrc:$XT), (ins vsfrc:$XB),
"xscvuxdsp $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
+ [(set f32:$XT, (PPCany_fcfidus f64:$XB))]>;
+ } // mayRaiseFPException
// Conversions between vector and scalar single precision
def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
"xscvdpspn $XT, $XB", IIC_VecFP, []>;
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
- } // mayRaiseFPException
let Predicates = [HasVSX, HasDirectMove] in {
// VSX direct move instructions
@@ -1440,15 +1453,16 @@ let Predicates = [HasVSX, HasP9Vector] in {
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
// QP Compare Ordered/Unordered
let hasSideEffects = 1 in {
- def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
- def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
-
// DP/QP Compare Exponents
def XSCMPEXPDP : XX3Form_1<60, 59,
(outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
"xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
+ let mayRaiseFPException = 1 in {
+ def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
+ def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
+
// DP Compare ==, >=, >, !=
// Use vsrc for XT, because the entire register of XT is set.
// XT.dword[1] = 0x0000_0000_0000_0000
@@ -1458,6 +1472,7 @@ let Predicates = [HasVSX, HasP9Vector] in {
IIC_FPCompare, []>;
def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
IIC_FPCompare, []>;
+ }
}
//===--------------------------------------------------------------------===//
@@ -1476,9 +1491,8 @@ let Predicates = [HasVSX, HasP9Vector] in {
f128:$vB))]>;
}
- // FIXME: Setting the hasSideEffects flag here to match current behaviour.
// Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
- let hasSideEffects = 1 in {
+ let mayRaiseFPException = 1 in {
def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
@@ -1494,11 +1508,12 @@ let Predicates = [HasVSX, HasP9Vector] in {
// vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
// but we still use vsfrc for it.
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
- let hasSideEffects = 1 in {
+ let hasSideEffects = 1, mayRaiseFPException = 1 in {
def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
}
+ let mayRaiseFPException = 1 in {
// Vector HP -> SP
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let hasSideEffects = 1 in
@@ -1507,16 +1522,15 @@ let Predicates = [HasVSX, HasP9Vector] in {
[(set v4f32:$XT,
(int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
- let mayRaiseFPException = 1 in {
- // Round to Quad-Precision Integer [with Inexact]
- def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
- def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
- }
+ // Round to Quad-Precision Integer [with Inexact]
+ def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
+ def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
// Round Quad-Precision to Double-Extended Precision (fp80)
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let hasSideEffects = 1 in
def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
+ }
//===--------------------------------------------------------------------===//
// Insert/Extract Instructions
@@ -1607,6 +1621,7 @@ let Predicates = [HasVSX, HasP9Vector] in {
(int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
// Maximum/Minimum Type-C/Type-J DP
+ let mayRaiseFPException = 1 in {
def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
IIC_VecFP,
[(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
@@ -1621,6 +1636,7 @@ let Predicates = [HasVSX, HasP9Vector] in {
def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
IIC_VecFP, []>;
}
+ }
// Vector Byte-Reverse H/W/D/Q Word
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
@@ -2392,33 +2408,48 @@ def MrgWords {
// arbitrarily chosen to be Big, Little.
//
// Predicate combinations available:
+// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr.
+// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr.
// [HasVSX]
// [HasVSX, IsBigEndian]
// [HasVSX, IsLittleEndian]
// [HasVSX, NoP9Vector]
+// [HasVSX, NoP9Vector, IsLittleEndian]
// [HasVSX, HasOnlySwappingMemOps]
// [HasVSX, HasOnlySwappingMemOps, IsBigEndian]
// [HasVSX, HasP8Vector]
-// [HasVSX, HasP8Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasP8Vector, IsLittleEndian]
-// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
// [HasVSX, HasDirectMove]
// [HasVSX, HasDirectMove, IsBigEndian]
// [HasVSX, HasDirectMove, IsLittleEndian]
-// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian]
+// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian, IsPPC64]
+// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian]
-// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian]
// [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian]
// [HasVSX, HasP9Vector]
-// [HasVSX, HasP9Vector, IsBigEndian]
+// [HasVSX, HasP9Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasP9Vector, IsLittleEndian]
// [HasVSX, HasP9Altivec]
-// [HasVSX, HasP9Altivec, IsBigEndian]
+// [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64]
// [HasVSX, HasP9Altivec, IsLittleEndian]
-// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian]
+// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64]
// [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
+// These Altivec patterns are here because we need a VSX instruction to match
+// the intrinsic (but only for little endian system).
+let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
+ def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+ v16i8:$b, v16i8:$c)),
+ (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC),
+ (COPY_TO_REGCLASS $c, VSRC))))>;
+let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
+ def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+ v16i8:$b, v16i8:$c)),
+ (v16i8 (VPERMXOR $a, $b, $c))>;
+
let AddedComplexity = 400 in {
// Valid for any VSX subtarget, regardless of endianness.
let Predicates = [HasVSX] in {
@@ -2450,6 +2481,10 @@ def : Pat<(fneg (PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C)),
def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)),
(XVNMADDASP $C, $A, $B)>;
+def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>;
+def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>;
+def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>;
+
def : Pat<(v2f64 (bitconvert v4f32:$A)),
(COPY_TO_REGCLASS $A, VSRC)>;
def : Pat<(v2f64 (bitconvert v4i32:$A)),
@@ -2579,6 +2614,16 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
(XVDIVDP $A, $B)>;
+// Vector test for software divide and sqrt.
+def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)),
+ (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)),
+ (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)),
+ (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)),
+ (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>;
+
// Reciprocal estimate
def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
(XVRESP $A)>;
@@ -2679,7 +2724,7 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(f32 (any_fround f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPI
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (any_fnearbyint f32:$S)),
+def : Pat<(f32 (fnearbyint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f32 (any_ffloor f32:$S)),
@@ -2694,11 +2739,11 @@ def : Pat<(f32 (any_ftrunc f32:$S)),
def : Pat<(f32 (any_frint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
+def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
// Rounding for double precision.
-def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
-def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
+def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
// Materialize a zero-vector of long long
def : Pat<(v2i64 immAllZerosV),
@@ -2975,6 +3020,19 @@ defm : ScalToVecWPermute<
VSFRC)), sub_64)>;
} // HasVSX, NoP9Vector
+// Any little endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in {
+// Load-and-splat using only X-Form VSX loads.
+defm : ScalToVecWPermute<
+ v2i64, (i64 (load xoaddr:$src)),
+ (XXPERMDIs (XFLOADf64 xoaddr:$src), 2),
+ (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v2f64, (f64 (load xoaddr:$src)),
+ (XXPERMDIs (XFLOADf64 xoaddr:$src), 2),
+ (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>;
+} // HasVSX, NoP9Vector, IsLittleEndian
+
// Any VSX subtarget that only has loads and stores that load in big endian
// order regardless of endianness. This is really pre-Power9 subtargets.
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
@@ -2986,8 +3044,8 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
} // HasVSX, HasOnlySwappingMemOps
-// Big endian VSX subtarget that only has loads and stores that always load
-// in big endian order. Really big endian pre-Power9 subtargets.
+// Big endian VSX subtarget that only has loads and stores that always
+// load in big endian order. Really big endian pre-Power9 subtargets.
let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in {
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -3080,7 +3138,7 @@ def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
} // HasVSX, HasP8Vector
// Big endian Power8 VSX subtarget.
-let Predicates = [HasVSX, HasP8Vector, IsBigEndian] in {
+let Predicates = [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] in {
def : Pat<DWToSPExtractConv.El0SS1,
(f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
def : Pat<DWToSPExtractConv.El1SS1,
@@ -3158,7 +3216,7 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
sub_64), xoaddr:$src)>;
}
-} // HasVSX, HasP8Vector, IsBigEndian
+} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
// Little endian Power8 VSX subtarget.
let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in {
@@ -3257,7 +3315,7 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
} // HasVSX, HasP8Vector, IsLittleEndian
// Big endian pre-Power9 VSX subtarget.
-let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] in {
+let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] in {
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
@@ -3268,7 +3326,7 @@ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
(XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
xoaddr:$src)>;
-} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian
+} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64
// Little endian pre-Power9 VSX subtarget.
let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in {
@@ -3525,8 +3583,8 @@ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
(i32 VectorExtractions.LE_VARIABLE_WORD)>;
} // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian
-// Big endian pre-Power9 VSX subtarget that has direct moves.
-let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] in {
+// Big endian pre-Power9 64Bit VSX subtarget that has direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] in {
// Big endian integer vectors using direct moves.
def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
(v2i64 (XXPERMDI
@@ -3540,7 +3598,7 @@ def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
(MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
-} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian
+} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64
// Little endian pre-Power9 VSX subtarget that has direct moves.
let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in {
@@ -3569,25 +3627,25 @@ def : Pat<(fneg (PPCfnmsub f128:$A, f128:$B, f128:$C)),
def : Pat<(PPCfnmsub f128:$A, f128:$B, (fneg f128:$C)),
(XSNMADDQP $C, $A, $B)>;
-def : Pat<(f128 (sint_to_fp i64:$src)),
+def : Pat<(f128 (any_sint_to_fp i64:$src)),
(f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
-def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
+def : Pat<(f128 (any_sint_to_fp (i64 (PPCmfvsr f64:$src)))),
(f128 (XSCVSDQP $src))>;
-def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
+def : Pat<(f128 (any_sint_to_fp (i32 (PPCmfvsr f64:$src)))),
(f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
-def : Pat<(f128 (uint_to_fp i64:$src)),
+def : Pat<(f128 (any_uint_to_fp i64:$src)),
(f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
-def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
+def : Pat<(f128 (any_uint_to_fp (i64 (PPCmfvsr f64:$src)))),
(f128 (XSCVUDQP $src))>;
// Convert (Un)Signed Word -> QP.
-def : Pat<(f128 (sint_to_fp i32:$src)),
+def : Pat<(f128 (any_sint_to_fp i32:$src)),
(f128 (XSCVSDQP (MTVSRWA $src)))>;
-def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))),
+def : Pat<(f128 (any_sint_to_fp (i32 (load xoaddr:$src)))),
(f128 (XSCVSDQP (LIWAX xoaddr:$src)))>;
-def : Pat<(f128 (uint_to_fp i32:$src)),
+def : Pat<(f128 (any_uint_to_fp i32:$src)),
(f128 (XSCVUDQP (MTVSRWZ $src)))>;
-def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
+def : Pat<(f128 (any_uint_to_fp (i32 (load xoaddr:$src)))),
(f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
@@ -3761,11 +3819,11 @@ def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
(f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>;
// Truncate & Convert QP -> (Un)Signed (D)Word.
-def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
-def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
-def : Pat<(i32 (fp_to_sint f128:$src)),
+def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
+def : Pat<(i64 (any_fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
+def : Pat<(i32 (any_fp_to_sint f128:$src)),
(i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
-def : Pat<(i32 (fp_to_uint f128:$src)),
+def : Pat<(i32 (any_fp_to_uint f128:$src)),
(i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
// Instructions for store(fptosi).
@@ -3893,8 +3951,8 @@ def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
(v4i32 (LXVWSX xoaddr:$A))>;
} // HasVSX, HasP9Vector
-// Big endian Power9 subtarget.
-let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in {
+// Big endian 64Bit Power9 subtarget.
+let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in {
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
@@ -4067,7 +4125,7 @@ foreach Idx = 0-15 in {
def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
(f128 (XSCVUDQP
(XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
-} // HasVSX, HasP9Vector, IsBigEndian
+} // HasVSX, HasP9Vector, IsBigEndian, IsPPC64
// Little endian Power9 subtarget.
let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in {
@@ -4292,8 +4350,8 @@ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
(v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
} // HasVSX, HasP9Altivec
-// Big endian Power9 VSX subtargets with P9 Altivec support.
-let Predicates = [HasVSX, HasP9Altivec, IsBigEndian] in {
+// Big endian Power9 64Bit VSX subtargets with P9 Altivec support.
+let Predicates = [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] in {
def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
(VEXTUBLX $Idx, $S)>;
@@ -4426,7 +4484,7 @@ def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
(v4i32 (VEXTSB2W $A))>;
def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
(v2i64 (VEXTSB2D $A))>;
-} // HasVSX, HasP9Altivec, IsBigEndian
+} // HasVSX, HasP9Altivec, IsBigEndian, IsPPC64
// Little endian Power9 VSX subtargets with P9 Altivec support.
let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in {
@@ -4563,8 +4621,9 @@ def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
(v2i64 (VEXTSB2D $A))>;
} // HasVSX, HasP9Altivec, IsLittleEndian
-// Big endian VSX subtarget that supports additional direct moves from ISA3.0.
-let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] in {
+// Big endian 64Bit VSX subtarget that supports additional direct moves from
+// ISA3.0.
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] in {
def : Pat<(i64 (extractelt v2i64:$A, 1)),
(i64 (MFVSRLD $A))>;
// Better way to build integer vectors if we have MTVSRDD. Big endian.
@@ -4577,7 +4636,7 @@ def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
(f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
-} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian
+} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64
// Little endian VSX subtarget that supports direct moves from ISA3.0.
let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in {
@@ -4602,20 +4661,24 @@ def : InstAlias<"xvmovdp $XT, $XB",
def : InstAlias<"xvmovsp $XT, $XB",
(XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+// Certain versions of the AIX assembler may missassemble these mnemonics.
+let Predicates = [ModernAs] in {
+ def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+ def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+ def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
+ def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
+}
+
def : InstAlias<"xxmrghd $XT, $XA, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
def : InstAlias<"xxmrgld $XT, $XA, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
def : InstAlias<"xxswapd $XT, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
def : InstAlias<"xxswapd $XT, $XB",
(XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
def : InstAlias<"mfvrd $rA, $XT",
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index a7546d2be5d8..c24240909797 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -60,6 +60,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -80,10 +81,8 @@
using namespace llvm;
-// By default, we limit this to creating 16 common bases out of loops per
-// function. 16 is a little over half of the allocatable register set.
static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars",
- cl::Hidden, cl::init(16),
+ cl::Hidden, cl::init(24),
cl::desc("Potential common base number threshold per function for PPC loop "
"prep"));
@@ -93,8 +92,7 @@ static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
// Sum of following 3 per loop thresholds for all loops can not be larger
// than MaxVarsPrep.
-// By default, we limit this to creating 9 PHIs for one loop.
-// 9 and 3 for each kind prep are exterimental values on Power9.
+// now the thresholds for each kind prep are exterimental values on Power9.
static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
cl::Hidden, cl::init(3),
cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
@@ -105,7 +103,7 @@ static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
- cl::Hidden, cl::init(3),
+ cl::Hidden, cl::init(8),
cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
@@ -277,8 +275,11 @@ static Value *GetPointerOperand(Value *MemI) {
} else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
return SMemI->getPointerOperand();
} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
- if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
+ IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp)
return IMemI->getArgOperand(0);
+ if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)
+ return IMemI->getArgOperand(1);
}
return nullptr;
@@ -345,9 +346,13 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
MemI = SMemI;
PtrValue = SMemI->getPointerOperand();
} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
- if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
+ IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
MemI = IMemI;
PtrValue = IMemI->getArgOperand(0);
+ } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
+ MemI = IMemI;
+ PtrValue = IMemI->getArgOperand(1);
} else continue;
} else continue;
@@ -606,6 +611,10 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
NewBasePtr = NewPHI;
}
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted below, causing the AssertingVH in the cache to trigger.
+ SCEVE.clear();
+
if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
BBChanged.insert(IDel->getParent());
BasePtr->replaceAllUsesWith(NewBasePtr);
@@ -791,7 +800,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
bool MadeChange = false;
// Only prep. the inner-most loop
- if (!L->empty())
+ if (!L->isInnermost())
return MadeChange;
// Return if already done enough preparation.
@@ -823,6 +832,11 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
if (ST && ST->hasAltivec() &&
PtrValue->getType()->getPointerElementType()->isVectorTy())
return false;
+ // There are no update forms for P10 lxvp/stxvp intrinsic.
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
+ II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
+ return false;
// See getPreIndexedAddressParts, the displacement for LDU/STDU has to
// be 4's multiple (DS-form). For i64 loads/stores when the displacement
// fits in a 16-bit signed field but isn't a multiple of 4, it will be
@@ -860,7 +874,13 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
// Check if a load/store has DQ form.
auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) {
assert((PtrValue && I) && "Invalid parameter!");
- return !isa<IntrinsicInst>(I) && ST && ST->hasP9Vector() &&
+ // Check if it is a P10 lxvp/stxvp intrinsic.
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (II)
+ return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
+ II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
+ // Check if it is a P9 vector load/store.
+ return ST && ST->hasP9Vector() &&
(PtrValue->getType()->getPointerElementType()->isVectorTy());
};
diff --git a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
index 2b0e604e0ccd..27b2c9a628d0 100644
--- a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@@ -16,6 +16,7 @@
#include "PPC.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
@@ -64,8 +65,7 @@ private:
/// Checks if the specified function name represents an entry in the MASSV
/// library.
bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) {
- auto Iter = std::find(std::begin(MASSVFuncs), std::end(MASSVFuncs), Name);
- return Iter != std::end(MASSVFuncs);
+ return llvm::is_contained(MASSVFuncs, Name);
}
// FIXME:
@@ -105,7 +105,7 @@ bool PPCLowerMASSVEntries::handlePowSpecialCases(CallInst *CI, Function &Func,
return false;
if (Constant *Exp = dyn_cast<Constant>(CI->getArgOperand(1)))
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(Exp->getSplatValue())) {
+ if (ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(Exp->getSplatValue())) {
// If the argument is 0.75 or 0.25 it is cheaper to turn it into pow
// intrinsic so that it could be optimzed as sequence of sqrt's.
if (!CI->hasNoInfs() || !CI->hasApproxFunc())
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 236f98f32e18..5cc180d770b2 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -74,7 +74,9 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO;
break;
case PPCII::MO_TLS:
- RefKind = MCSymbolRefExpr::VK_PPC_TLS;
+ bool IsPCRel = (MO.getTargetFlags() & ~access) == PPCII::MO_PCREL_FLAG;
+ RefKind = IsPCRel ? MCSymbolRefExpr::VK_PPC_TLS_PCREL
+ : MCSymbolRefExpr::VK_PPC_TLS;
break;
}
@@ -84,6 +86,14 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PCREL;
else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG))
RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
+ else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG))
+ RefKind = MCSymbolRefExpr::VK_TPREL;
+ else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL;
+ else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSLD_PCREL_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL;
+ else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL;
const MachineInstr *MI = MO.getParent();
const MachineFunction *MF = MI->getMF();
@@ -100,6 +110,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
MIOpcode == PPC::BL8_NOTOC) {
RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
}
+ if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_PCREL_OPT;
}
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index d2aba6bd6e8d..c8b01aaef828 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -267,6 +267,113 @@ void PPCMIPeephole::UpdateTOCSaves(
TOCSaves[MI] = Keep;
}
+// This function returns a list of all PHI nodes in the tree starting from
+// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
+// The list initially only contains the root PHI. When we visit a PHI node, we
+// add it to the list. We continue to look for other PHI node operands while
+// there are nodes to visit in the list. The function returns false if the
+// optimization cannot be applied on this tree.
+static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
+ MachineInstr *RootPHI,
+ SmallVectorImpl<MachineInstr *> &PHIs) {
+ PHIs.push_back(RootPHI);
+ unsigned VisitedIndex = 0;
+ while (VisitedIndex < PHIs.size()) {
+ MachineInstr *VisitedPHI = PHIs[VisitedIndex];
+ for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
+ PHIOp != NumOps; PHIOp += 2) {
+ Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
+ if (!Register::isVirtualRegister(RegOp))
+ return false;
+ MachineInstr *Instr = MRI->getVRegDef(RegOp);
+ // While collecting the PHI nodes, we check if they can be converted (i.e.
+ // all the operands are either copies, implicit defs or PHI nodes).
+ unsigned Opcode = Instr->getOpcode();
+ if (Opcode == PPC::COPY) {
+ Register Reg = Instr->getOperand(1).getReg();
+ if (!Register::isVirtualRegister(Reg) ||
+ MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
+ return false;
+ } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
+ return false;
+ // If we detect a cycle in the PHI nodes, we exit. It would be
+ // possible to change cycles as well, but that would add a lot
+ // of complexity for a case that is unlikely to occur with MMA
+ // code.
+ if (Opcode != PPC::PHI)
+ continue;
+ if (llvm::is_contained(PHIs, Instr))
+ return false;
+ PHIs.push_back(Instr);
+ }
+ VisitedIndex++;
+ }
+ return true;
+}
+
+// This function changes the unprimed accumulator PHI nodes in the PHIs list to
+// primed accumulator PHI nodes. The list is traversed in reverse order to
+// change all the PHI operands of a PHI node before changing the node itself.
+// We keep a map to associate each changed PHI node to its non-changed form.
+static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
+ MachineRegisterInfo *MRI,
+ SmallVectorImpl<MachineInstr *> &PHIs,
+ Register Dst) {
+ DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
+ for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) {
+ MachineInstr *PHI = *It;
+ SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
+ // We check if the current PHI node can be changed by looking at its
+ // operands. If all the operands are either copies from primed
+ // accumulators, implicit definitions or other unprimed accumulator
+ // PHI nodes, we change it.
+ for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
+ PHIOp += 2) {
+ Register RegOp = PHI->getOperand(PHIOp).getReg();
+ MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
+ unsigned Opcode = PHIInput->getOpcode();
+ assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
+ Opcode == PPC::PHI) &&
+ "Unexpected instruction");
+ if (Opcode == PPC::COPY) {
+ assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
+ &PPC::ACCRCRegClass &&
+ "Unexpected register class");
+ PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
+ } else if (Opcode == PPC::IMPLICIT_DEF) {
+ Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
+ BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
+ TII->get(PPC::IMPLICIT_DEF), AccReg);
+ PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
+ PHI->getOperand(PHIOp + 1)});
+ } else if (Opcode == PPC::PHI) {
+ // We found a PHI operand. At this point we know this operand
+ // has already been changed so we get its associated changed form
+ // from the map.
+ assert(ChangedPHIMap.count(PHIInput) == 1 &&
+ "This PHI node should have already been changed.");
+ MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
+ PHIOps.push_back({MachineOperand::CreateReg(
+ PrimedAccPHI->getOperand(0).getReg(), false),
+ PHI->getOperand(PHIOp + 1)});
+ }
+ }
+ Register AccReg = Dst;
+ // If the PHI node we are changing is the root node, the register it defines
+ // will be the destination register of the original copy (of the PHI def).
+ // For all other PHI's in the list, we need to create another primed
+ // accumulator virtual register as the PHI will no longer define the
+ // unprimed accumulator.
+ if (PHI != PHIs[0])
+ AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
+ MachineInstrBuilder NewPHI = BuildMI(
+ *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
+ for (auto RegMBB : PHIOps)
+ NewPHI.add(RegMBB.first).add(RegMBB.second);
+ ChangedPHIMap[PHI] = NewPHI.getInstr();
+ }
+}
+
// Perform peephole optimizations.
bool PPCMIPeephole::simplifyCode(void) {
bool Simplified = false;
@@ -321,6 +428,38 @@ bool PPCMIPeephole::simplifyCode(void) {
default:
break;
+ case PPC::COPY: {
+ Register Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(Src) ||
+ !Register::isVirtualRegister(Dst))
+ break;
+ if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
+ MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
+ break;
+
+ // We are copying an unprimed accumulator to a primed accumulator.
+ // If the input to the copy is a PHI that is fed only by (i) copies in
+ // the other direction (ii) implicitly defined unprimed accumulators or
+ // (iii) other PHI nodes satisfying (i) and (ii), we can change
+ // the PHI to a PHI on primed accumulators (as long as we also change
+ // its operands). To detect and change such copies, we first get a list
+ // of all the PHI nodes starting from the root PHI node in BFS order.
+ // We then visit all these PHI nodes to check if they can be changed to
+ // primed accumulator PHI nodes and if so, we change them.
+ MachineInstr *RootPHI = MRI->getVRegDef(Src);
+ if (RootPHI->getOpcode() != PPC::PHI)
+ break;
+
+ SmallVector<MachineInstr *, 4> PHIs;
+ if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
+ break;
+
+ convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
+
+ ToErase = &MI;
+ break;
+ }
case PPC::LI:
case PPC::LI8: {
// If we are materializing a zero, look for any use operands for which
@@ -573,7 +712,7 @@ bool PPCMIPeephole::simplifyCode(void) {
Simplified = true;
Register ConvReg1 = RoundInstr->getOperand(1).getReg();
Register FRSPDefines = RoundInstr->getOperand(0).getReg();
- MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
+ MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
if (Use.getOperand(i).isReg() &&
Use.getOperand(i).getReg() == FRSPDefines)
@@ -848,142 +987,9 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::RLWINM_rec:
case PPC::RLWINM8:
case PPC::RLWINM8_rec: {
- unsigned FoldingReg = MI.getOperand(1).getReg();
- if (!Register::isVirtualRegister(FoldingReg))
- break;
-
- MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
- if (SrcMI->getOpcode() != PPC::RLWINM &&
- SrcMI->getOpcode() != PPC::RLWINM_rec &&
- SrcMI->getOpcode() != PPC::RLWINM8 &&
- SrcMI->getOpcode() != PPC::RLWINM8_rec)
- break;
- assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
- MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
- SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
- "Invalid PPC::RLWINM Instruction!");
- uint64_t SHSrc = SrcMI->getOperand(2).getImm();
- uint64_t SHMI = MI.getOperand(2).getImm();
- uint64_t MBSrc = SrcMI->getOperand(3).getImm();
- uint64_t MBMI = MI.getOperand(3).getImm();
- uint64_t MESrc = SrcMI->getOperand(4).getImm();
- uint64_t MEMI = MI.getOperand(4).getImm();
-
- assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
- "Invalid PPC::RLWINM Instruction!");
-
- // If MBMI is bigger than MEMI, we always can not get run of ones.
- // RotatedSrcMask non-wrap:
- // 0........31|32........63
- // RotatedSrcMask: B---E B---E
- // MaskMI: -----------|--E B------
- // Result: ----- --- (Bad candidate)
- //
- // RotatedSrcMask wrap:
- // 0........31|32........63
- // RotatedSrcMask: --E B----|--E B----
- // MaskMI: -----------|--E B------
- // Result: --- -----|--- ----- (Bad candidate)
- //
- // One special case is RotatedSrcMask is a full set mask.
- // RotatedSrcMask full:
- // 0........31|32........63
- // RotatedSrcMask: ------EB---|-------EB---
- // MaskMI: -----------|--E B------
- // Result: -----------|--- ------- (Good candidate)
-
- // Mark special case.
- bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
-
- // For other MBMI > MEMI cases, just return.
- if ((MBMI > MEMI) && !SrcMaskFull)
- break;
-
- // Handle MBMI <= MEMI cases.
- APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
- // In MI, we only need low 32 bits of SrcMI, just consider about low 32
- // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
- // while in PowerPC ISA, lowerest bit is at index 63.
- APInt MaskSrc =
- APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
-
- APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
- APInt FinalMask = RotatedSrcMask & MaskMI;
- uint32_t NewMB, NewME;
-
- // If final mask is 0, MI result should be 0 too.
- if (FinalMask.isNullValue()) {
- bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 ||
- MI.getOpcode() == PPC::RLWINM8_rec);
-
- Simplified = true;
-
- LLVM_DEBUG(dbgs() << "Replace Instr: ");
- LLVM_DEBUG(MI.dump());
-
- if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
- // Replace MI with "LI 0"
- MI.RemoveOperand(4);
- MI.RemoveOperand(3);
- MI.RemoveOperand(2);
- MI.getOperand(1).ChangeToImmediate(0);
- MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI));
- } else {
- // Replace MI with "ANDI_rec reg, 0"
- MI.RemoveOperand(4);
- MI.RemoveOperand(3);
- MI.getOperand(2).setImm(0);
- MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
- MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
- if (SrcMI->getOperand(1).isKill()) {
- MI.getOperand(1).setIsKill(true);
- SrcMI->getOperand(1).setIsKill(false);
- } else
- // About to replace MI.getOperand(1), clear its kill flag.
- MI.getOperand(1).setIsKill(false);
- }
-
- LLVM_DEBUG(dbgs() << "With: ");
- LLVM_DEBUG(MI.dump());
- } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
- NewME) && NewMB <= NewME)|| SrcMaskFull) {
- // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
- // than NewME. Otherwise we get a 64 bit value after folding, but MI
- // return a 32 bit value.
-
- Simplified = true;
- LLVM_DEBUG(dbgs() << "Converting Instr: ");
- LLVM_DEBUG(MI.dump());
-
- uint16_t NewSH = (SHSrc + SHMI) % 32;
- MI.getOperand(2).setImm(NewSH);
- // If SrcMI mask is full, no need to update MBMI and MEMI.
- if (!SrcMaskFull) {
- MI.getOperand(3).setImm(NewMB);
- MI.getOperand(4).setImm(NewME);
- }
- MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
- if (SrcMI->getOperand(1).isKill()) {
- MI.getOperand(1).setIsKill(true);
- SrcMI->getOperand(1).setIsKill(false);
- } else
- // About to replace MI.getOperand(1), clear its kill flag.
- MI.getOperand(1).setIsKill(false);
-
- LLVM_DEBUG(dbgs() << "To: ");
- LLVM_DEBUG(MI.dump());
- }
- if (Simplified) {
- // If FoldingReg has no non-debug use and it has no implicit def (it
- // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
- // Otherwise keep it.
+ Simplified = TII->combineRLWINM(MI, &ToErase);
+ if (Simplified)
++NumRotatesCollapsed;
- if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) {
- ToErase = SrcMI;
- LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
- LLVM_DEBUG(SrcMI->dump());
- }
- }
break;
}
}
@@ -1555,6 +1561,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
MI.getOperand(2).setImm(NewSH);
MI.getOperand(3).setImm(NewMB);
+ MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill());
+ SrcMI->getOperand(1).setIsKill(false);
LLVM_DEBUG(dbgs() << "To: ");
LLVM_DEBUG(MI.dump());
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index daf88589bb52..c976a9c62d3b 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,6 +8,7 @@
#include "PPCMachineFunctionInfo.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
@@ -63,3 +64,36 @@ bool PPCFunctionInfo::isLiveInZExt(Register VReg) const {
return LiveIn.second.isZExt();
return false;
}
+
+void PPCFunctionInfo::appendParameterType(ParamType Type) {
+ uint32_t CopyParamType = ParameterType;
+ int Bits = 0;
+
+ // If it is fixed type, we only need to increase the FixedParamNum, for
+ // the bit encode of fixed type is bit of zero, we do not need to change the
+ // ParamType.
+ if (Type == FixedType) {
+ ++FixedParamNum;
+ return;
+ }
+
+ ++FloatingPointParamNum;
+
+ for (int I = 0;
+ I < static_cast<int>(FloatingPointParamNum + FixedParamNum - 1); ++I) {
+ if (CopyParamType & XCOFF::TracebackTable::ParmTypeIsFloatingBit) {
+ // '10'b => floating point short parameter.
+ // '11'b => floating point long parameter.
+ CopyParamType <<= 2;
+ Bits += 2;
+ } else {
+ // '0'b => fixed parameter.
+ CopyParamType <<= 1;
+ ++Bits;
+ }
+ }
+
+ assert(Type != FixedType && "FixedType should already be handled.");
+ if (Bits < 31)
+ ParameterType |= Type << (30 - Bits);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 29ca53e273d7..4b73b36318b4 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -22,6 +22,16 @@ namespace llvm {
/// PPCFunctionInfo - This class is derived from MachineFunction private
/// PowerPC target-specific information for each MachineFunction.
class PPCFunctionInfo : public MachineFunctionInfo {
+public:
+ // The value in the ParamType are used to indicate the bitstrings used in the
+ // encoding format.
+ enum ParamType {
+ FixedType = 0x0,
+ ShortFloatPoint = 0x2,
+ LongFloatPoint = 0x3
+ };
+
+private:
virtual void anchor();
/// FramePointerSaveIndex - Frame index of where the old frame pointer is
@@ -69,9 +79,6 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// disabled.
bool DisableNonVolatileCR = false;
- /// Indicates whether VRSAVE is spilled in the current function.
- bool SpillsVRSAVE = false;
-
/// LRStoreRequired - The bool indicates whether there is some explicit use of
/// the LR/LR8 stack slot that is not obvious from scanning the code. This
/// requires that the code generator produce a store of LR to the stack on
@@ -110,6 +117,20 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// register for parameter passing.
unsigned VarArgsNumFPR = 0;
+ /// FixedParamNum - Number of fixed parameter.
+ unsigned FixedParamNum = 0;
+
+ /// FloatingParamNum - Number of floating point parameter.
+ unsigned FloatingPointParamNum = 0;
+
+ /// ParamType - Encode type for every parameter
+ /// in the order of parameters passing in.
+ /// Bitstring starts from the most significant (leftmost) bit.
+ /// '0'b => fixed parameter.
+ /// '10'b => floating point short parameter.
+ /// '11'b => floating point long parameter.
+ uint32_t ParameterType = 0;
+
/// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
int CRSpillFrameIndex = 0;
@@ -175,9 +196,6 @@ public:
void setDisableNonVolatileCR() { DisableNonVolatileCR = true; }
bool isNonVolatileCRDisabled() const { return DisableNonVolatileCR; }
- void setSpillsVRSAVE() { SpillsVRSAVE = true; }
- bool isVRSAVESpilled() const { return SpillsVRSAVE; }
-
void setLRStoreRequired() { LRStoreRequired = true; }
bool isLRStoreRequired() const { return LRStoreRequired; }
@@ -196,6 +214,13 @@ public:
unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+ unsigned getFixedParamNum() const { return FixedParamNum; }
+
+ unsigned getFloatingPointParamNum() const { return FloatingPointParamNum; }
+
+ uint32_t getParameterType() const { return ParameterType; }
+ void appendParameterType(ParamType Type);
+
unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index 5649d7d13966..ce615e554d94 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -49,10 +49,103 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary *Zone) const {
- GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+ // From GenericScheduler::tryCandidate
- if (!Cand.isValid() || !Zone)
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
return;
+ }
+
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
+ return;
+
+ // Avoid exceeding the target's limit.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
+ RegExcess, TRI, DAG->MF))
+ return;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
+ TryCand, Cand, RegCritical, TRI, DAG->MF))
+ return;
+
+ // We only compare a subset of features when comparing nodes between
+ // Top and Bottom boundary. Some properties are simply incomparable, in many
+ // other instances we should only override the other boundary if something
+ // is a clear good pick on one boundary. Skip heuristics that are more
+ // "tie-breaking" in nature.
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ // For loops that are acyclic path limited, aggressively schedule for
+ // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
+ // heuristics to take precedence.
+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
+ tryLatency(TryCand, Cand, *Zone))
+ return;
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
+ Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
+ }
+
+ // Keep clustered nodes together to encourage downstream peephole
+ // optimizations which may reduce resource requirements.
+ //
+ // This is a best effort to set things up for a post-RA pass. Optimizations
+ // like generating loads of multiple registers should ideally be done within
+ // the scheduler pass by combining the loads during DAG postprocessing.
+ const SUnit *CandNextClusterSU =
+ Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ const SUnit *TryCandNextClusterSU =
+ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == TryCandNextClusterSU,
+ Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
+ return;
+
+ if (SameBoundary) {
+ // Weak edges are for clustering and other constraints.
+ if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
+ getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
+ return;
+ }
+
+ // Avoid increasing the max pressure of the entire region.
+ if (DAG->isTrackingPressure() &&
+ tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
+ Cand, RegMax, TRI, DAG->MF))
+ return;
+
+ if (SameBoundary) {
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources, TryCand, Cand,
+ ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ // For acyclic path limited loops, latency was already checked above.
+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
+ return;
+
+ // Fall through to original instruction order.
+ if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
+ (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
+ }
+
+ // GenericScheduler::tryCandidate end
// Add powerpc specific heuristic only when TryCand isn't selected or
// selected as node order.
@@ -61,8 +154,10 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// There are some benefits to schedule the ADDI before the load to hide the
// latency, as RA may create a true dependency between the load and addi.
- if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
- return;
+ if (SameBoundary) {
+ if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
+ return;
+ }
}
bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
@@ -79,11 +174,44 @@ bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
- PostGenericScheduler::tryCandidate(Cand, TryCand);
+ // From PostGenericScheduler::tryCandidate
+
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
+ Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
- if (!Cand.isValid())
+ // Keep clustered nodes together.
+ if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
+ Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster))
return;
+ // Avoid critical resource consumption and balance the schedule.
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources, TryCand, Cand,
+ ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
+ return;
+ }
+
+ // Fall through to original instruction order.
+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ TryCand.Reason = NodeOrder;
+
+ // PostGenericScheduler::tryCandidate end
+
// Add powerpc post ra specific heuristic only when TryCand isn't selected or
// selected as node order.
if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index 815dfd1402f4..d12c6d9cd406 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -51,8 +51,8 @@ public:
Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First),
OpSet2(Second) {}
- bool hasOp1(unsigned Opc) const { return OpSet1.count(Opc) != 0; }
- bool hasOp2(unsigned Opc) const { return OpSet2.count(Opc) != 0; }
+ bool hasOp1(unsigned Opc) const { return OpSet1.contains(Opc); }
+ bool hasOp2(unsigned Opc) const { return OpSet2.contains(Opc); }
bool isSupported() const { return Supported; }
Optional<unsigned> depOpIdx() const {
if (DepOpIdx < 0)
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4ea714ff15f7..a8853609a7c8 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -39,10 +39,54 @@ STATISTIC(NumFrameOffFoldInPreEmit,
"Number of folding frame offset by using r+r in pre-emit peephole");
static cl::opt<bool>
+EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
+ cl::desc("enable PC Relative linker optimization"));
+
+static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
cl::desc("Run pre-emit peephole optimizations."));
namespace {
+
+static bool hasPCRelativeForm(MachineInstr &Use) {
+ switch (Use.getOpcode()) {
+ default:
+ return false;
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::LWA:
+ case PPC::LXSD:
+ case PPC::LXSSP:
+ case PPC::LXV:
+ case PPC::STXSD:
+ case PPC::STXSSP:
+ case PPC::STXV:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::DFLOADf32:
+ case PPC::DFLOADf64:
+ case PPC::DFSTOREf32:
+ case PPC::DFSTOREf64:
+ return true;
+ }
+}
+
class PPCPreEmitPeephole : public MachineFunctionPass {
public:
static char ID;
@@ -77,7 +121,7 @@ namespace {
for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
// Skip load immediate that is marked to be erased later because it
// cannot be used to replace any other instructions.
- if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
+ if (InstrsToErase.contains(&*BBI))
continue;
// Skip non-load immediate.
unsigned Opc = BBI->getOpcode();
@@ -172,6 +216,196 @@ namespace {
return !InstrsToErase.empty();
}
+ // Check if this instruction is a PLDpc that is part of a GOT indirect
+ // access.
+ bool isGOTPLDpc(MachineInstr &Instr) {
+ if (Instr.getOpcode() != PPC::PLDpc)
+ return false;
+
+ // The result must be a register.
+ const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
+ if (!LoadedAddressReg.isReg())
+ return false;
+
+ // Make sure that this is a global symbol.
+ const MachineOperand &SymbolOp = Instr.getOperand(1);
+ if (!SymbolOp.isGlobal())
+ return false;
+
+ // Finally return true only if the GOT flag is present.
+ return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
+ }
+
+ bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
+ MachineFunction *MF = MBB.getParent();
+ // If the linker opt is disabled then just return.
+ if (!EnablePCRelLinkerOpt)
+ return false;
+
+ // Add this linker opt only if we are using PC Relative memops.
+ if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
+ return false;
+
+ // Struct to keep track of one def/use pair for a GOT indirect access.
+ struct GOTDefUsePair {
+ MachineBasicBlock::iterator DefInst;
+ MachineBasicBlock::iterator UseInst;
+ Register DefReg;
+ Register UseReg;
+ bool StillValid;
+ };
+ // Vector of def/ues pairs in this basic block.
+ SmallVector<GOTDefUsePair, 4> CandPairs;
+ SmallVector<GOTDefUsePair, 4> ValidPairs;
+ bool MadeChange = false;
+
+ // Run through all of the instructions in the basic block and try to
+ // collect potential pairs of GOT indirect access instructions.
+ for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+ // Look for the initial GOT indirect load.
+ if (isGOTPLDpc(*BBI)) {
+ GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
+ BBI->getOperand(0).getReg(),
+ PPC::NoRegister, true};
+ CandPairs.push_back(CurrentPair);
+ continue;
+ }
+
+ // We haven't encountered any new PLD instructions, nothing to check.
+ if (CandPairs.empty())
+ continue;
+
+ // Run through the candidate pairs and see if any of the registers
+ // defined in the PLD instructions are used by this instruction.
+ // Note: the size of CandPairs can change in the loop.
+ for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
+ GOTDefUsePair &Pair = CandPairs[Idx];
+ // The instruction does not use or modify this PLD's def reg,
+ // ignore it.
+ if (!BBI->readsRegister(Pair.DefReg, TRI) &&
+ !BBI->modifiesRegister(Pair.DefReg, TRI))
+ continue;
+
+ // The use needs to be used in the address compuation and not
+ // as the register being stored for a store.
+ const MachineOperand *UseOp =
+ hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
+
+ // Check for a valid use.
+ if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
+ UseOp->isUse() && UseOp->isKill()) {
+ Pair.UseInst = BBI;
+ Pair.UseReg = BBI->getOperand(0).getReg();
+ ValidPairs.push_back(Pair);
+ }
+ CandPairs.erase(CandPairs.begin() + Idx);
+ }
+ }
+
+ // Go through all of the pairs and check for any more valid uses.
+ for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
+ // We shouldn't be here if we don't have a valid pair.
+ assert(Pair->UseInst.isValid() && Pair->StillValid &&
+ "Kept an invalid def/use pair for GOT PCRel opt");
+ // We have found a potential pair. Search through the instructions
+ // between the def and the use to see if it is valid to mark this as a
+ // linker opt.
+ MachineBasicBlock::iterator BBI = Pair->DefInst;
+ ++BBI;
+ for (; BBI != Pair->UseInst; ++BBI) {
+ if (BBI->readsRegister(Pair->UseReg, TRI) ||
+ BBI->modifiesRegister(Pair->UseReg, TRI)) {
+ Pair->StillValid = false;
+ break;
+ }
+ }
+
+ if (!Pair->StillValid)
+ continue;
+
+ // The load/store instruction that uses the address from the PLD will
+ // either use a register (for a store) or define a register (for the
+ // load). That register will be added as an implicit def to the PLD
+ // and as an implicit use on the second memory op. This is a precaution
+ // to prevent future passes from using that register between the two
+ // instructions.
+ MachineOperand ImplDef =
+ MachineOperand::CreateReg(Pair->UseReg, true, true);
+ MachineOperand ImplUse =
+ MachineOperand::CreateReg(Pair->UseReg, false, true);
+ Pair->DefInst->addOperand(ImplDef);
+ Pair->UseInst->addOperand(ImplUse);
+
+ // Create the symbol.
+ MCContext &Context = MF->getContext();
+ MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
+ MachineOperand PCRelLabel =
+ MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
+ Pair->DefInst->addOperand(*MF, PCRelLabel);
+ Pair->UseInst->addOperand(*MF, PCRelLabel);
+ MadeChange |= true;
+ }
+ return MadeChange;
+ }
+
+ // This function removes redundant pairs of accumulator prime/unprime
+ // instructions. In some situations, it's possible the compiler inserts an
+ // accumulator prime instruction followed by an unprime instruction (e.g.
+ // when we store an accumulator after restoring it from a spill). If the
+ // accumulator is not used between the two, they can be removed. This
+ // function removes these redundant pairs from basic blocks.
+ // The algorithm is quite straightforward - every time we encounter a prime
+ // instruction, the primed register is added to a candidate set. Any use
+ // other than a prime removes the candidate from the set and any de-prime
+ // of a current candidate marks both the prime and de-prime for removal.
+ // This way we ensure we only remove prime/de-prime *pairs* with no
+ // intervening uses.
+ bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
+ DenseSet<MachineInstr *> InstrsToErase;
+ // Initially, none of the acc registers are candidates.
+ SmallVector<MachineInstr *, 8> Candidates(
+ PPC::UACCRCRegClass.getNumRegs(), nullptr);
+
+ for (MachineInstr &BBI : MBB.instrs()) {
+ unsigned Opc = BBI.getOpcode();
+ // If we are visiting a xxmtacc instruction, we add it and its operand
+ // register to the candidate set.
+ if (Opc == PPC::XXMTACC) {
+ Register Acc = BBI.getOperand(0).getReg();
+ assert(PPC::ACCRCRegClass.contains(Acc) &&
+ "Unexpected register for XXMTACC");
+ Candidates[Acc - PPC::ACC0] = &BBI;
+ }
+ // If we are visiting a xxmfacc instruction and its operand register is
+ // in the candidate set, we mark the two instructions for removal.
+ else if (Opc == PPC::XXMFACC) {
+ Register Acc = BBI.getOperand(0).getReg();
+ assert(PPC::ACCRCRegClass.contains(Acc) &&
+ "Unexpected register for XXMFACC");
+ if (!Candidates[Acc - PPC::ACC0])
+ continue;
+ InstrsToErase.insert(&BBI);
+ InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
+ }
+ // If we are visiting an instruction using an accumulator register
+ // as operand, we remove it from the candidate set.
+ else {
+ for (MachineOperand &Operand : BBI.operands()) {
+ if (!Operand.isReg())
+ continue;
+ Register Reg = Operand.getReg();
+ if (PPC::ACCRCRegClass.contains(Reg))
+ Candidates[Reg - PPC::ACC0] = nullptr;
+ }
+ }
+ }
+
+ for (MachineInstr *MI : InstrsToErase)
+ MI->eraseFromParent();
+ NumRemovedInPreEmit += InstrsToErase.size();
+ return !InstrsToErase.empty();
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
// Remove UNENCODED_NOP even when this pass is disabled.
@@ -192,6 +426,8 @@ namespace {
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
Changed |= removeRedundantLIs(MBB, TRI);
+ Changed |= addLinkerOpt(MBB, TRI);
+ Changed |= removeAccPrimeUnprime(MBB);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
if (Opc == PPC::UNENCODED_NOP) {
diff --git a/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
deleted file mode 100644
index 6e9042643820..000000000000
--- a/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// The QPX vector registers overlay the scalar floating-point registers, and
-// any scalar floating-point loads splat their value across all vector lanes.
-// Thus, if we have a scalar load followed by a splat, we can remove the splat
-// (i.e. replace the load with a load-and-splat pseudo instruction).
-//
-// This pass must run after anything that might do store-to-load forwarding.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPC.h"
-#include "PPCInstrBuilder.h"
-#include "PPCInstrInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "ppc-qpx-load-splat"
-
-STATISTIC(NumSimplified, "Number of QPX load splats simplified");
-
-namespace {
- struct PPCQPXLoadSplat : public MachineFunctionPass {
- static char ID;
- PPCQPXLoadSplat() : MachineFunctionPass(ID) {
- initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
- StringRef getPassName() const override {
- return "PowerPC QPX Load Splat Simplification";
- }
- };
- char PPCQPXLoadSplat::ID = 0;
-}
-
-INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
- "PowerPC QPX Load Splat Simplification",
- false, false)
-
-FunctionPass *llvm::createPPCQPXLoadSplatPass() {
- return new PPCQPXLoadSplat();
-}
-
-bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
-
- bool MadeChange = false;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-
- for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
- SmallVector<MachineInstr *, 4> Splats;
-
- for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
- MachineInstr *MI = &*MBBI;
-
- if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
- Splats.clear();
- continue;
- }
-
- // We're looking for a sequence like this:
- // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
- // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
-
- for (auto SI = Splats.begin(); SI != Splats.end();) {
- MachineInstr *SMI = *SI;
- Register SplatReg = SMI->getOperand(0).getReg();
- Register SrcReg = SMI->getOperand(1).getReg();
-
- if (MI->modifiesRegister(SrcReg, TRI)) {
- switch (MI->getOpcode()) {
- default:
- SI = Splats.erase(SI);
- continue;
- case PPC::LFS:
- case PPC::LFD:
- case PPC::LFSU:
- case PPC::LFDU:
- case PPC::LFSUX:
- case PPC::LFDUX:
- case PPC::LFSX:
- case PPC::LFDX:
- case PPC::LFIWAX:
- case PPC::LFIWZX:
- if (SplatReg != SrcReg) {
- // We need to change the load to define the scalar subregister of
- // the QPX splat source register.
- unsigned SubRegIndex =
- TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
- Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
-
- // Substitute both the explicit defined register, and also the
- // implicit def of the containing QPX register.
- MI->getOperand(0).setReg(SplatSubReg);
- MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
- }
-
- SI = Splats.erase(SI);
-
- // If SMI is directly after MI, then MBBI's base iterator is
- // pointing at SMI. Adjust MBBI around the call to erase SMI to
- // avoid invalidating MBBI.
- ++MBBI;
- SMI->eraseFromParent();
- --MBBI;
-
- ++NumSimplified;
- MadeChange = true;
- continue;
- }
- }
-
- // If this instruction defines the splat register, then we cannot move
- // the previous definition above it. If it reads from the splat
- // register, then it must already be alive from some previous
- // definition, and if the splat register is different from the source
- // register, then this definition must not be the load for which we're
- // searching.
- if (MI->modifiesRegister(SplatReg, TRI) ||
- (SrcReg != SplatReg &&
- MI->readsRegister(SplatReg, TRI))) {
- SI = Splats.erase(SI);
- continue;
- }
-
- ++SI;
- }
-
- if (MI->getOpcode() != PPC::QVESPLATI &&
- MI->getOpcode() != PPC::QVESPLATIs &&
- MI->getOpcode() != PPC::QVESPLATIb)
- continue;
- if (MI->getOperand(2).getImm() != 0)
- continue;
-
- // If there are other uses of the scalar value after this, replacing
- // those uses might be non-trivial.
- if (!MI->getOperand(1).isKill())
- continue;
-
- Splats.push_back(MI);
- }
- }
-
- return MadeChange;
-}
diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 90cc81beb89d..5cee00c61fc1 100644
--- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -206,9 +206,9 @@ static bool splitMBB(BlockSplitInfo &BSI) {
NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
NewMBB->transferSuccessors(ThisMBB);
if (!ProbOrigTarget.isUnknown()) {
- auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
+ auto MBBI = find(NewMBB->successors(), OrigTarget);
NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
- MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
+ MBBI = find(NewMBB->successors(), OrigFallThrough);
NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 35f5e1fbebcd..178a13443e2a 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -75,6 +75,21 @@ MaxCRBitSpillDist("ppc-max-crbit-spill-dist",
"spill on ppc"),
cl::Hidden, cl::init(100));
+// Copies/moves of physical accumulators are expensive operations
+// that should be avoided whenever possible. MMA instructions are
+// meant to be used in performance-sensitive computational kernels.
+// This option is provided, at least for the time being, to give the
+// user a tool to detect this expensive operation and either rework
+// their code or report a compiler bug if that turns out to be the
+// cause.
+#ifndef NDEBUG
+static cl::opt<bool>
+ReportAccMoves("ppc-report-acc-moves",
+ cl::desc("Emit information about accumulator register spills "
+ "and copies"),
+ cl::Hidden, cl::init(false));
+#endif
+
static unsigned offsetMinAlignForOpcode(unsigned OpC);
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
@@ -141,6 +156,10 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
+ if (Subtarget.isAIXABI() &&
+ (Subtarget.hasAltivec() && !TM.getAIXExtendedAltivecABI()))
+ report_fatal_error("the default AIX Altivec ABI is not yet "
+ "supported.");
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) {
if (!TM.isPPC64() && Subtarget.isAIXABI())
report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
@@ -187,8 +206,11 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return SaveR2 ? CSR_PPC64_R2_SaveList : CSR_PPC64_SaveList;
}
// 32-bit targets.
- if (Subtarget.isAIXABI())
+ if (Subtarget.isAIXABI()) {
+ if (Subtarget.hasAltivec())
+ return CSR_AIX32_Altivec_SaveList;
return CSR_AIX32_SaveList;
+ }
if (Subtarget.hasAltivec())
return CSR_SVR432_Altivec_SaveList;
else if (Subtarget.hasSPE())
@@ -209,8 +231,10 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
}
if (Subtarget.isAIXABI()) {
- assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
- return TM.isPPC64() ? CSR_PPC64_RegMask : CSR_AIX32_RegMask;
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
+ : CSR_PPC64_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_AIX32_Altivec_RegMask
+ : CSR_AIX32_RegMask);
}
if (CC == CallingConv::Cold) {
@@ -404,9 +428,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
- case PPC::QFRCRegClassID:
- case PPC::QSRCRegClassID:
- case PPC::QBRCRegClassID:
case PPC::VRRCRegClassID:
case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:
@@ -624,21 +645,30 @@ void PPCRegisterInfo::lowerPrepareProbedAlloca(
bool LP64 = TM.isPPC64();
DebugLoc dl = MI.getDebugLoc();
Register FramePointer = MI.getOperand(0).getReg();
- Register FinalStackPtr = MI.getOperand(1).getReg();
+ const Register ActualNegSizeReg = MI.getOperand(1).getReg();
bool KillNegSizeReg = MI.getOperand(2).isKill();
Register NegSizeReg = MI.getOperand(2).getReg();
- prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
- if (LP64) {
- BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr)
- .addReg(PPC::X1)
- .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
-
- } else {
- BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr)
- .addReg(PPC::R1)
- .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ const MCInstrDesc &CopyInst = TII.get(LP64 ? PPC::OR8 : PPC::OR);
+ // RegAllocator might allocate FramePointer and NegSizeReg in the same phyreg.
+ if (FramePointer == NegSizeReg) {
+ assert(KillNegSizeReg && "FramePointer is a def and NegSizeReg is an use, "
+ "NegSizeReg should be killed");
+ // FramePointer is clobbered earlier than the use of NegSizeReg in
+ // prepareDynamicAlloca, save NegSizeReg in ActualNegSizeReg to avoid
+ // misuse.
+ BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
+ .addReg(NegSizeReg)
+ .addReg(NegSizeReg);
+ NegSizeReg = ActualNegSizeReg;
+ KillNegSizeReg = false;
}
-
+ prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
+ // NegSizeReg might be updated in prepareDynamicAlloca if MaxAlign >
+ // TargetAlign.
+ if (NegSizeReg != ActualNegSizeReg)
+ BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
+ .addReg(NegSizeReg)
+ .addReg(NegSizeReg);
MBB.erase(II);
}
@@ -821,6 +851,16 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
SpillsKnownBit = true;
break;
default:
+ // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all
+ // bits (specifically, it produces a -1 if the CR bit is set). Ultimately,
+ // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit
+ // register), and SETNBC will set this.
+ if (Subtarget.isISA3_1()) {
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg)
+ .addReg(SrcReg, RegState::Undef);
+ break;
+ }
+
// On Power9, we can use SETB to extract the LT bit. This only works for
// the LT bit since SETB produces -1/1/0 for LT/GT/<neither>. So the value
// of the bit we care about (32-bit sign bit) will be set to the value of
@@ -920,54 +960,104 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
- unsigned FrameIndex) const {
- // Get the instruction.
- MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
- // Get the instruction's basic block.
+void PPCRegisterInfo::emitAccCopyInfo(MachineBasicBlock &MBB,
+ MCRegister DestReg, MCRegister SrcReg) {
+#ifdef NDEBUG
+ return;
+#else
+ if (ReportAccMoves) {
+ std::string Dest = PPC::ACCRCRegClass.contains(DestReg) ? "acc" : "uacc";
+ std::string Src = PPC::ACCRCRegClass.contains(SrcReg) ? "acc" : "uacc";
+ dbgs() << "Emitting copy from " << Src << " to " << Dest << ":\n";
+ MBB.dump();
+ }
+#endif
+}
+
+static void emitAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsPrimed,
+ bool IsRestore) {
+#ifdef NDEBUG
+ return;
+#else
+ if (ReportAccMoves) {
+ dbgs() << "Emitting " << (IsPrimed ? "acc" : "uacc") << " register "
+ << (IsRestore ? "restore" : "spill") << ":\n";
+ MBB.dump();
+ }
+#endif
+}
+
+/// lowerACCSpilling - Generate the code for spilling the accumulator register.
+/// Similarly to other spills/reloads that use pseudo-ops, we do not actually
+/// eliminate the FrameIndex here nor compute the stack offset. We simply
+/// create a real instruction with an FI and rely on eliminateFrameIndex to
+/// handle the FI elimination.
+void PPCRegisterInfo::lowerACCSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ MachineInstr &MI = *II; // SPILL_ACC <SrcReg>, <offset>
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- DebugLoc dl = MI.getDebugLoc();
-
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- Register Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ DebugLoc DL = MI.getDebugLoc();
Register SrcReg = MI.getOperand(0).getReg();
-
- BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
- addFrameReference(
- BuildMI(MBB, II, dl, TII.get(PPC::STW)).addReg(Reg, RegState::Kill),
- FrameIndex);
+ bool IsKilled = MI.getOperand(0).isKill();
+
+ bool IsPrimed = PPC::ACCRCRegClass.contains(SrcReg);
+ Register Reg =
+ PPC::VSRp0 + (SrcReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2;
+ bool IsLittleEndian = Subtarget.isLittleEndian();
+
+ emitAccSpillRestoreInfo(MBB, IsPrimed, false);
+
+ // De-prime the register being spilled, create two stores for the pair
+ // subregisters accounting for endianness and then re-prime the register if
+ // it isn't killed. This uses the Offset parameter to addFrameReference() to
+ // adjust the offset of the store that is within the 64-byte stack slot.
+ if (IsPrimed)
+ BuildMI(MBB, II, DL, TII.get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(Reg, getKillRegState(IsKilled)),
+ FrameIndex, IsLittleEndian ? 32 : 0);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(Reg + 1, getKillRegState(IsKilled)),
+ FrameIndex, IsLittleEndian ? 0 : 32);
+ if (IsPrimed && !IsKilled)
+ BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
// Discard the pseudo instruction.
MBB.erase(II);
}
-void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
- unsigned FrameIndex) const {
- // Get the instruction.
- MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
- // Get the instruction's basic block.
+/// lowerACCRestore - Generate the code to restore the accumulator register.
+void PPCRegisterInfo::lowerACCRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ MachineInstr &MI = *II; // <DestReg> = RESTORE_ACC <offset>
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- DebugLoc dl = MI.getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- Register Reg = MF.getRegInfo().createVirtualRegister(GPRC);
Register DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
- "RESTORE_VRSAVE does not define its destination");
+ "RESTORE_ACC does not define its destination");
- addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
- Reg), FrameIndex);
+ bool IsPrimed = PPC::ACCRCRegClass.contains(DestReg);
+ Register Reg =
+ PPC::VSRp0 + (DestReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2;
+ bool IsLittleEndian = Subtarget.isLittleEndian();
- BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
- .addReg(Reg, RegState::Kill);
+ emitAccSpillRestoreInfo(MBB, IsPrimed, true);
+
+ // Create two loads for the pair subregisters accounting for endianness and
+ // then prime the accumulator register being restored.
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg),
+ FrameIndex, IsLittleEndian ? 32 : 0);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg + 1),
+ FrameIndex, IsLittleEndian ? 0 : 32);
+ if (IsPrimed)
+ BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), DestReg).addReg(DestReg);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -1084,7 +1174,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::PREPARE_PROBED_ALLOCA_64 ||
- OpC == PPC::PREPARE_PROBED_ALLOCA_32)) {
+ OpC == PPC::PREPARE_PROBED_ALLOCA_32 ||
+ OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 ||
+ OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32)) {
lowerPrepareProbedAlloca(II);
return;
}
@@ -1102,11 +1194,11 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else if (OpC == PPC::RESTORE_CRBIT) {
lowerCRBitRestore(II, FrameIndex);
return;
- } else if (OpC == PPC::SPILL_VRSAVE) {
- lowerVRSAVESpilling(II, FrameIndex);
+ } else if (OpC == PPC::SPILL_ACC || OpC == PPC::SPILL_UACC) {
+ lowerACCSpilling(II, FrameIndex);
return;
- } else if (OpC == PPC::RESTORE_VRSAVE) {
- lowerVRSAVERestore(II, FrameIndex);
+ } else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) {
+ lowerACCRestore(II, FrameIndex);
return;
}
@@ -1283,10 +1375,9 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
/// Insert defining instruction(s) for BaseReg to
/// be a pointer to FrameIdx at the beginning of the basic block.
-void PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- Register BaseReg,
- int FrameIdx,
- int64_t Offset) const {
+Register PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ int FrameIdx,
+ int64_t Offset) const {
unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
MachineBasicBlock::iterator Ins = MBB->begin();
@@ -1299,10 +1390,14 @@ void PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = getPointerRegClass(MF);
+ Register BaseReg = MRI.createVirtualRegister(RC);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
BuildMI(*MBB, Ins, DL, MCID, BaseReg)
.addFrameIndex(FrameIdx).addImm(Offset);
+
+ return BaseReg;
}
void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 61acd955e1cb..93f330ab56b6 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -119,10 +119,14 @@ public:
unsigned FrameIndex) const;
void lowerCRBitRestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
- void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
- unsigned FrameIndex) const;
- void lowerVRSAVERestore(MachineBasicBlock::iterator II,
- unsigned FrameIndex) const;
+
+ void lowerACCSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerACCRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+
+ static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg,
+ MCRegister SrcReg);
bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
int &FrameIdx) const override;
@@ -132,9 +136,8 @@ public:
// Support for virtual base registers.
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
- int FrameIdx,
- int64_t Offset) const override;
+ Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
+ int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
@@ -151,12 +154,18 @@ public:
/// register name so that only the number is left. Used by for linux asm.
static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
+ case 'a':
+ if (RegName[1] == 'c' && RegName[2] == 'c')
+ return RegName + 3;
+ break;
case 'r':
case 'f':
- case 'q': // for QPX
case 'v':
- if (RegName[1] == 's')
+ if (RegName[1] == 's') {
+ if (RegName[2] == 'p')
+ return RegName + 3;
return RegName + 2;
+ }
return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index b45757c1acc5..e03617aa75ff 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -16,6 +16,10 @@ def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
def sub_64 : SubRegIndex<64>;
+def sub_vsx0 : SubRegIndex<128>;
+def sub_vsx1 : SubRegIndex<128, 128>;
+def sub_pair0 : SubRegIndex<256>;
+def sub_pair1 : SubRegIndex<256, 256>;
}
@@ -54,13 +58,6 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
-// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
-class QFPR<FPR SubReg, string n> : PPCReg<n> {
- let HWEncoding = SubReg.HWEncoding;
- let SubRegs = [SubReg];
- let SubRegIndices = [sub_64];
-}
-
// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
@@ -101,6 +98,27 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
+// ACC - One of the 8 512-bit VSX accumulators.
+class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// UACC - One of the 8 512-bit VSX accumulators prior to being primed.
+// Without using this register class, the register allocator has no way to
+// differentiate a primed accumulator from an unprimed accumulator.
+// This may result in invalid copies between primed and unprimed accumulators.
+class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// VSR Pairs - One of the 32 paired even-odd consecutive VSRs.
+class VSRPair<bits<5> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+ let SubRegs = subregs;
+}
+
// General-purpose registers
foreach Index = 0-31 in {
def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
@@ -132,12 +150,6 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
-// QPX Floating-point registers
-foreach Index = 0-31 in {
- def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
- DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
-}
-
// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
@@ -156,6 +168,23 @@ foreach Index = 32-63 in {
def VSX#Index : VSXReg<Index, "vs"#Index>;
}
+let SubRegIndices = [sub_vsx0, sub_vsx1] in {
+ // VSR pairs 0 - 15 (corresponding to VSRs 0 - 30 paired with 1 - 31).
+ foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
+ def VSRp#!srl(Index, 1) : VSRPair<!srl(Index, 1), "vsp"#Index,
+ [!cast<VSRL>("VSL"#Index), !cast<VSRL>("VSL"#!add(Index, 1))]>,
+ DwarfRegNum<[0, 0]>;
+ }
+
+ // VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63).
+ foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
+ def VSRp#!add(!srl(Index, 1), 16) :
+ VSRPair<!add(!srl(Index, 1), 16), "vsp"#!add(Index, 32),
+ [!cast<VR>("V"#Index), !cast<VR>("V"#!add(Index, 1))]>,
+ DwarfRegNum<[0, 0]>;
+ }
+}
+
// The representation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">, DwarfRegAlias<R0>;
def ZERO8 : GP8<ZERO, "0">, DwarfRegAlias<X0>;
@@ -343,16 +372,6 @@ def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC,
// Register class for single precision scalars in VSX registers
def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
-// For QPX
-def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
- (sequence "QF%u", 31, 14))>;
-def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
-def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
- // These are actually stored as floating-point values where a positive
- // number is true and anything else (including NaN) is false.
- let Size = 256;
-}
-
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
@@ -395,3 +414,44 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> {
let CopyCost = -1;
}
+let SubRegIndices = [sub_pair0, sub_pair1] in {
+ def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>;
+ def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>;
+ def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>;
+ def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>;
+ def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>;
+ def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>;
+ def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>;
+ def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>;
+}
+def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
+ ACC4, ACC5, ACC6, ACC7)> {
+ let Size = 512;
+}
+
+let SubRegIndices = [sub_pair0, sub_pair1] in {
+ def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>;
+ def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>;
+ def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>;
+ def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>;
+ def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>;
+ def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>;
+ def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>;
+ def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>;
+}
+def UACCRC : RegisterClass<"PPC", [v512i1], 128,
+ (add UACC0, UACC1, UACC2, UACC3,
+ UACC4, UACC5, UACC6, UACC7)> {
+ let Size = 512;
+}
+
+// Allocate in the same order as the underlying VSX registers.
+def VSRpRC :
+ RegisterClass<"PPC", [v256i1], 128,
+ (add (sequence "VSRp%u", 0, 6),
+ (sequence "VSRp%u", 15, 7), VSRp17, VSRp18,
+ VSRp16, VSRp19, VSRp20, VSRp21, VSRp22, VSRp23,
+ VSRp24, VSRp25, VSRp31, VSRp30, VSRp29, VSRp28,
+ VSRp27, VSRp26)> {
+ let Size = 256;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 0a1ae7e55b3c..571cc219ff2b 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -40,12 +40,11 @@ def P9Model : SchedMachineModel {
let CompleteModel = 1;
- // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
- // Engine), prefixed instructions on Power 9, PC relative mem ops, or
- // instructions introduced in ISA 3.1.
- let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
- IsISA3_1];
-
+ // Do not support SPE (Signal Processing Engine), prefixed instructions on
+ // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions
+ // introduced in ISA 3.1.
+ let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA,
+ PCRelativeMemops, IsISA3_1];
}
let SchedModel = P9Model in {
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 3836cc960394..d31195f67ef1 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -11,9 +11,13 @@
//===----------------------------------------------------------------------===//
#include "PPCSubtarget.h"
+#include "GISel/PPCCallLowering.h"
+#include "GISel/PPCLegalizerInfo.h"
+#include "GISel/PPCRegisterBankInfo.h"
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
@@ -35,10 +39,6 @@ using namespace llvm;
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
-static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
- cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
- cl::Hidden);
-
static cl::opt<bool>
EnableMachinePipeliner("ppc-enable-pipeliner",
cl::desc("Enable Machine Pipeliner for PPC"),
@@ -53,11 +53,19 @@ PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const PPCTargetMachine &TM)
- : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ : PPCGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TargetTriple(TT),
IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
TargetTriple.getArch() == Triple::ppc64le),
TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
- InstrInfo(*this), TLInfo(TM, *this) {}
+ InstrInfo(*this), TLInfo(TM, *this) {
+ CallLoweringInfo.reset(new PPCCallLowering(*getTargetLowering()));
+ Legalizer.reset(new PPCLegalizerInfo(*this));
+ auto *RBI = new PPCRegisterBankInfo(*getRegisterInfo());
+ RegBankInfo.reset(RBI);
+
+ InstSelector.reset(createPPCInstructionSelector(
+ *static_cast<const PPCTargetMachine *>(&TM), *this, *RBI));
+}
void PPCSubtarget::initializeEnvironment() {
StackAlignment = Align(16);
@@ -69,8 +77,8 @@ void PPCSubtarget::initializeEnvironment() {
HasHardFloat = false;
HasAltivec = false;
HasSPE = false;
+ HasEFPU2 = false;
HasFPU = false;
- HasQPX = false;
HasVSX = false;
NeedsTwoConstNR = false;
HasP8Vector = false;
@@ -78,6 +86,7 @@ void PPCSubtarget::initializeEnvironment() {
HasP8Crypto = false;
HasP9Vector = false;
HasP9Altivec = false;
+ HasMMA = false;
HasP10Vector = false;
HasPrefixInstrs = false;
HasPCRelativeMemops = false;
@@ -109,10 +118,10 @@ void PPCSubtarget::initializeEnvironment() {
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
HasDirectMove = false;
- IsQPXStackUnaligned = false;
HasHTM = false;
HasFloat128 = false;
HasFusion = false;
+ HasStoreFusion = false;
HasAddiLoadFusion = false;
HasAddisLoadFusion = false;
IsISA3_0 = false;
@@ -122,7 +131,10 @@ void PPCSubtarget::initializeEnvironment() {
VectorsUseTwoUnits = false;
UsePPCPreRASchedStrategy = false;
UsePPCPostRASchedStrategy = false;
+ PairedVectorMemops = false;
PredictableSelectIsExpensive = false;
+ HasModernAIXAs = false;
+ IsAIX = false;
HasPOPCNTD = POPCNTD_Unavailable;
}
@@ -144,7 +156,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
InstrItins = getInstrItineraryForCPU(CPUName);
// Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
// If the user requested use of 64-bit regs, but the cpu selected doesn't
// support it, ignore.
@@ -158,7 +170,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (HasSPE && IsPPC64)
report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
- if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
+ if (HasSPE && (HasAltivec || HasVSX || HasFPU))
report_fatal_error(
"SPE and traditional floating point cannot both be enabled.\n", false);
@@ -166,15 +178,12 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (!HasSPE)
HasFPU = true;
- // QPX requires a 32-byte aligned stack. Note that we need to do this if
- // we're compiling for a BG/Q system regardless of whether or not QPX
- // is enabled because external functions will assume this alignment.
- IsQPXStackUnaligned = QPXStackUnaligned;
StackAlignment = getPlatformStackAlignment();
// Determine endianness.
// FIXME: Part of the TargetMachine.
- IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
+ IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le ||
+ TargetTriple.getArch() == Triple::ppcle);
}
bool PPCSubtarget::enableMachineScheduler() const { return true; }
@@ -235,3 +244,20 @@ bool PPCSubtarget::isUsingPCRelativeCalls() const {
return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() &&
CodeModel::Medium == getTargetMachine().getCodeModel();
}
+
+// GlobalISEL
+const CallLowering *PPCSubtarget::getCallLowering() const {
+ return CallLoweringInfo.get();
+}
+
+const RegisterBankInfo *PPCSubtarget::getRegBankInfo() const {
+ return RegBankInfo.get();
+}
+
+const LegalizerInfo *PPCSubtarget::getLegalizerInfo() const {
+ return Legalizer.get();
+}
+
+InstructionSelector *PPCSubtarget::getInstructionSelector() const {
+ return InstSelector.get();
+}
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index ec329022c457..50d89390d5bc 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -17,6 +17,9 @@
#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -97,7 +100,7 @@ protected:
bool HasAltivec;
bool HasFPU;
bool HasSPE;
- bool HasQPX;
+ bool HasEFPU2;
bool HasVSX;
bool NeedsTwoConstNR;
bool HasP8Vector;
@@ -108,6 +111,7 @@ protected:
bool HasP10Vector;
bool HasPrefixInstrs;
bool HasPCRelativeMemops;
+ bool HasMMA;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -137,6 +141,7 @@ protected:
bool HasHTM;
bool HasFloat128;
bool HasFusion;
+ bool HasStoreFusion;
bool HasAddiLoadFusion;
bool HasAddisLoadFusion;
bool IsISA3_0;
@@ -146,21 +151,25 @@ protected:
bool VectorsUseTwoUnits;
bool UsePPCPreRASchedStrategy;
bool UsePPCPostRASchedStrategy;
+ bool PairedVectorMemops;
bool PredictableSelectIsExpensive;
+ bool HasModernAIXAs;
+ bool IsAIX;
POPCNTDKind HasPOPCNTD;
- /// When targeting QPX running a stock PPC64 Linux kernel where the stack
- /// alignment has not been changed, we need to keep the 16-byte alignment
- /// of the stack.
- bool IsQPXStackUnaligned;
-
const PPCTargetMachine &TM;
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
PPCTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
+ /// GlobalISel related APIs.
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
@@ -170,16 +179,13 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
Align getStackAlignment() const { return StackAlignment; }
- /// getDarwinDirective - Returns the -m directive specified for the cpu.
- unsigned getDarwinDirective() const { return CPUDirective; }
-
/// getCPUDirective - Returns the -m directive specified for the cpu.
///
unsigned getCPUDirective() const { return CPUDirective; }
@@ -254,8 +260,8 @@ public:
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
bool hasSPE() const { return HasSPE; }
+ bool hasEFPU2() const { return HasEFPU2; }
bool hasFPU() const { return HasFPU; }
- bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
bool needsTwoConstNR() const { return NeedsTwoConstNR; }
bool hasP8Vector() const { return HasP8Vector; }
@@ -266,6 +272,8 @@ public:
bool hasP10Vector() const { return HasP10Vector; }
bool hasPrefixInstrs() const { return HasPrefixInstrs; }
bool hasPCRelativeMemops() const { return HasPCRelativeMemops; }
+ bool hasMMA() const { return HasMMA; }
+ bool pairedVectorMemops() const { return PairedVectorMemops; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasBPERMD() const { return HasBPERMD; }
@@ -291,11 +299,7 @@ public:
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool hasDirectMove() const { return HasDirectMove; }
- bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
Align getPlatformStackAlignment() const {
- if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
- return Align(32);
-
return Align(16);
}
@@ -315,6 +319,7 @@ public:
bool isISA3_1() const { return IsISA3_1; }
bool useLongCalls() const { return UseLongCalls; }
bool hasFusion() const { return HasFusion; }
+ bool hasStoreFusion() const { return HasStoreFusion; }
bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
bool needsSwapsForVSXMemOps() const {
@@ -325,9 +330,6 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
- /// isBGQ - True if this is a BG/Q platform.
- bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
-
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
@@ -404,6 +406,12 @@ public:
bool isPredictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
}
+
+ // GlobalISEL
+ const CallLowering *getCallLowering() const override;
+ const RegisterBankInfo *getRegBankInfo() const override;
+ const LegalizerInfo *getLegalizerInfo() const override;
+ InstructionSelector *getInstructionSelector() const override;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 4b809e0c8553..43dcc5844c4e 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -50,16 +50,17 @@ protected:
bool Changed = false;
bool NeedFence = true;
bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+ bool IsPCREL = false;
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE;) {
MachineInstr &MI = *I;
+ IsPCREL = isPCREL(MI);
if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
- MI.getOpcode() != PPC::ADDItlsldLADDR32) {
-
+ MI.getOpcode() != PPC::ADDItlsldLADDR32 && !IsPCREL) {
// Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
// as scheduling fences, we skip creating fences if we already
// have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@@ -76,12 +77,16 @@ protected:
LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI);
Register OutReg = MI.getOperand(0).getReg();
- Register InReg = MI.getOperand(1).getReg();
- DebugLoc DL = MI.getDebugLoc();
+ Register InReg = PPC::NoRegister;
Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
- unsigned Opc1, Opc2;
- const Register OrigRegs[] = {OutReg, InReg, GPR3};
+ SmallVector<Register, 3> OrigRegs = {OutReg, GPR3};
+ if (!IsPCREL) {
+ InReg = MI.getOperand(1).getReg();
+ OrigRegs.push_back(InReg);
+ }
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Opc1, Opc2;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Opcode inconsistency error");
@@ -101,6 +106,13 @@ protected:
Opc1 = PPC::ADDItlsldL32;
Opc2 = PPC::GETtlsldADDR32;
break;
+ case PPC::PADDI8pc:
+ assert(IsPCREL && "Expecting General/Local Dynamic PCRel");
+ Opc1 = PPC::PADDI8pc;
+ Opc2 = MI.getOperand(2).getTargetFlags() ==
+ PPCII::MO_GOT_TLSGD_PCREL_FLAG
+ ? PPC::GETtlsADDRPCREL
+ : PPC::GETtlsldADDRPCREL;
}
// We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
@@ -113,9 +125,15 @@ protected:
BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
.addImm(0);
- // Expand into two ops built prior to the existing instruction.
- MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
- .addReg(InReg);
+ MachineInstr *Addi;
+ if (IsPCREL) {
+ Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0);
+ } else {
+ // Expand into two ops built prior to the existing instruction.
+ assert(InReg != PPC::NoRegister && "Operand must be a register");
+ Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg);
+ }
+
Addi->addOperand(MI.getOperand(2));
// The ADDItls* instruction is the first instruction in the
@@ -125,7 +143,10 @@ protected:
MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
.addReg(GPR3));
- Call->addOperand(MI.getOperand(3));
+ if (IsPCREL)
+ Call->addOperand(MI.getOperand(2));
+ else
+ Call->addOperand(MI.getOperand(3));
if (NeedFence)
BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
@@ -150,6 +171,14 @@ protected:
}
public:
+ bool isPCREL(const MachineInstr &MI) {
+ return (MI.getOpcode() == PPC::PADDI8pc) &&
+ (MI.getOperand(2).getTargetFlags() ==
+ PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
+ MI.getOperand(2).getTargetFlags() ==
+ PPCII::MO_GOT_TLSLD_PCREL_FLAG);
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index f15f9c7f4942..0634833e64dc 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -24,12 +24,18 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
@@ -64,10 +70,6 @@ opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
cl::desc("Disable VSX Swap Removal for PPC"));
static cl::
-opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
- cl::desc("Disable QPX load splat simplification"));
-
-static cl::
opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
cl::desc("Disable machine peepholes for PPC"));
@@ -98,8 +100,9 @@ static cl::opt<bool>
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
- RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
- RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
+ RegisterTargetMachine<PPCTargetMachine> B(getThePPC32LETarget());
+ RegisterTargetMachine<PPCTargetMachine> C(getThePPC64Target());
+ RegisterTargetMachine<PPCTargetMachine> D(getThePPC64LETarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
#ifndef NDEBUG
@@ -114,13 +117,13 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
initializePPCReduceCRLogicalsPass(PR);
initializePPCBSelPass(PR);
initializePPCBranchCoalescingPass(PR);
- initializePPCQPXLoadSplatPass(PR);
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
initializePPCPreEmitPeepholePass(PR);
initializePPCTLSDynamicCallPass(PR);
initializePPCMIPeepholePass(PR);
initializePPCLowerMASSVEntriesPass(PR);
+ initializeGlobalISel(PR);
}
/// Return the datalayout string of a subtarget.
@@ -128,8 +131,8 @@ static std::string getDataLayoutString(const Triple &T) {
bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
std::string Ret;
- // Most PPC* platforms are big endian, PPC64LE is little endian.
- if (T.getArch() == Triple::ppc64le)
+ // Most PPC* platforms are big endian, PPC(64)LE is little endian.
+ if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle)
Ret = "e";
else
Ret = "E";
@@ -143,10 +146,7 @@ static std::string getDataLayoutString(const Triple &T) {
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
// documentation are wrong; these are correct (i.e. "what gcc does").
- if (is64Bit || !T.isOSDarwin())
- Ret += "-i64:64";
- else
- Ret += "-f64:32:64";
+ Ret += "-i64:64";
// PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
if (is64Bit)
@@ -154,6 +154,13 @@ static std::string getDataLayoutString(const Triple &T) {
else
Ret += "-n32";
+ // Specify the vector alignment explicitly. For v256i1 and v512i1, the
+ // calculated alignment would be 256*alignment(i1) and 512*alignment(i1),
+ // which is 256 and 512 bytes - way over aligned.
+ if ((T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppc64) &&
+ (T.isOSAIX() || T.isOSLinux()))
+ Ret += "-v256:256:256-v512:512:512";
+
return Ret;
}
@@ -183,13 +190,17 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
FullFS = "+invariant-function-descriptors";
}
+ if (TT.isOSAIX()) {
+ if (!FullFS.empty())
+ FullFS = "+aix," + FullFS;
+ else
+ FullFS = "+aix";
+ }
+
return FullFS;
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
- if (TT.isOSDarwin())
- return std::make_unique<TargetLoweringObjectFileMachO>();
-
if (TT.isOSAIX())
return std::make_unique<TargetLoweringObjectFileXCOFF>();
@@ -198,9 +209,6 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
const TargetOptions &Options) {
- if (TT.isOSDarwin())
- report_fatal_error("Darwin is no longer supported for PowerPC");
-
if (Options.MCOptions.getABIName().startswith("elfv1"))
return PPCTargetMachine::PPC_ABI_ELFv1;
else if (Options.MCOptions.getABIName().startswith("elfv2"))
@@ -230,10 +238,6 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
if (RM.hasValue())
return *RM;
- // Darwin defaults to dynamic-no-pic.
- if (TT.isOSDarwin())
- return Reloc::DynamicNoPIC;
-
// Big Endian PPC and AIX default to PIC.
if (TT.getArch() == Triple::ppc64 || TT.isOSAIX())
return Reloc::PIC_;
@@ -276,6 +280,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
std::make_unique<GenericScheduler>(C));
// add DAG Mutations here.
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.hasStoreFusion())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.hasFusion())
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
@@ -290,6 +296,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
std::make_unique<PPCPostRASchedStrategy>(C) :
std::make_unique<PostGenericScheduler>(C), true);
// add DAG Mutations here.
+ if (ST.hasStoreFusion())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.hasFusion())
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
return DAG;
@@ -321,12 +329,10 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
// FIXME: This is related to the code below to reset the target options,
// we need to know whether or not the soft float flag is set on the
@@ -388,6 +394,12 @@ public:
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+ // GlobalISEL
+ bool addIRTranslator() override;
+ bool addLegalizeMachineIR() override;
+ bool addRegBankSelect() override;
+ bool addGlobalInstructionSelect() override;
+
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
return createPPCMachineScheduler(C);
@@ -411,14 +423,9 @@ void PPCPassConfig::addIRPasses() {
// Lower generic MASSV routines to PowerPC subtarget-specific entries.
addPass(createPPCLowerMASSVEntriesPass());
-
- // For the BG/Q (or if explicitly requested), add explicit data prefetch
- // intrinsics.
- bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
- getOptLevel() != CodeGenOpt::None;
+
+ // If explicitly requested, add explicit data prefetch intrinsics.
if (EnablePrefetch.getNumOccurrences() > 0)
- UsePrefetching = EnablePrefetch;
- if (UsePrefetching)
addPass(createLoopDataPrefetchPass());
if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
@@ -515,15 +522,8 @@ void PPCPassConfig::addPreRegAlloc() {
}
void PPCPassConfig::addPreSched2() {
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
-
- // This optimization must happen after anything that might do store-to-load
- // forwarding. Here we're after RA (and, thus, when spills are inserted)
- // but before post-RA scheduling.
- if (!DisableQPXLoadSplat)
- addPass(createPPCQPXLoadSplatPass());
- }
}
void PPCPassConfig::addPreEmitPass() {
@@ -550,3 +550,24 @@ static MachineSchedRegistry
PPCPostRASchedRegistry("ppc-postra",
"Run PowerPC PostRA specific scheduler",
createPPCPostMachineScheduler);
+
+// Global ISEL
+bool PPCPassConfig::addIRTranslator() {
+ addPass(new IRTranslator());
+ return false;
+}
+
+bool PPCPassConfig::addLegalizeMachineIR() {
+ addPass(new Legalizer());
+ return false;
+}
+
+bool PPCPassConfig::addRegBankSelect() {
+ addPass(new RegBankSelect());
+ return false;
+}
+
+bool PPCPassConfig::addGlobalInstructionSelect() {
+ addPass(new InstructionSelect());
+ return false;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index fd1d14ae32d4..21faa4e710e3 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -58,6 +58,11 @@ public:
const Triple &TT = getTargetTriple();
return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
};
+
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 53556ffc267d..b3d8100fe016 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -8,13 +8,19 @@
#include "PPCTargetTransformInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
#define DEBUG_TYPE "ppctti"
@@ -22,8 +28,7 @@ using namespace llvm;
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
-// This is currently only used for the data prefetch pass which is only enabled
-// for BG/Q by default.
+// This is currently only used for the data prefetch pass
static cl::opt<unsigned>
CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
cl::desc("The loop prefetch cache line size"));
@@ -59,6 +64,109 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
+Optional<Instruction *>
+PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
+ Value *Ptr = IC.Builder.CreateBitCast(
+ II.getArgOperand(0), PointerType::getUnqual(II.getType()));
+ return new LoadInst(II.getType(), Ptr, "", false, Align(16));
+ }
+ break;
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x: {
+ // Turn PPC VSX loads into normal loads.
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
+ PointerType::getUnqual(II.getType()));
+ return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(
+ II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
+ &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
+ Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
+ }
+ break;
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x: {
+ // Turn PPC VSX stores into normal stores.
+ Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
+ Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
+ return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
+ }
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ // Note that ppc_altivec_vperm has a big-endian bias, so when creating
+ // a vectorshuffle for little endian, we must undo the transformation
+ // performed on vec_perm in altivec.h. That is, we must complement
+ // the permutation mask with respect to 31 and reverse the order of
+ // V1 and V2.
+ if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
+ assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
+ "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ Constant *Elt = Mask->getAggregateElement(i);
+ if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 =
+ IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
+ Value *Op1 =
+ IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getAggregateElement(i)))
+ continue;
+ unsigned Idx =
+ cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
+ if (DL.isLittleEndian())
+ Idx = 31 - Idx;
+
+ if (!ExtractedElts[Idx]) {
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
+ ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
+ Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
+ }
+
+ // Insert this value into the result vector.
+ Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
+ IC.Builder.getInt32(i));
+ }
+ return CastInst::Create(Instruction::BitCast, Result, II.getType());
+ }
+ }
+ break;
+ }
+ return None;
+}
+
int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
@@ -126,9 +234,10 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
+ return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
assert(Ty->isIntegerTy());
@@ -226,6 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
return BaseT::getUserCost(U, Operands, CostKind);
}
+// Determining the address of a TLS variable results in a function call in
+// certain TLS models.
+static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
+ SmallPtrSetImpl<const Value *> &Visited) {
+ // No need to traverse again if we already checked this operand.
+ if (!Visited.insert(MemAddr).second)
+ return false;
+ const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+ if (!GV) {
+ // Recurse to check for constants that refer to TLS global variables.
+ if (const auto *CV = dyn_cast<Constant>(MemAddr))
+ for (const auto &CO : CV->operands())
+ if (memAddrUsesCTR(CO, TM, Visited))
+ return true;
+ return false;
+ }
+
+ if (!GV->isThreadLocal())
+ return false;
+ TLSModel::Model Model = TM.getTLSModel(GV);
+ return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
+}
+
bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
SmallPtrSetImpl<const Value *> &Visited) {
const PPCTargetMachine &TM = ST->getTargetMachine();
@@ -244,31 +376,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
return false;
};
- // Determining the address of a TLS variable results in a function call in
- // certain TLS models.
- std::function<bool(const Value *)> memAddrUsesCTR =
- [&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool {
- // No need to traverse again if we already checked this operand.
- if (!Visited.insert(MemAddr).second)
- return false;
- const auto *GV = dyn_cast<GlobalValue>(MemAddr);
- if (!GV) {
- // Recurse to check for constants that refer to TLS global variables.
- if (const auto *CV = dyn_cast<Constant>(MemAddr))
- for (const auto &CO : CV->operands())
- if (memAddrUsesCTR(CO))
- return true;
-
- return false;
- }
-
- if (!GV->isThreadLocal())
- return false;
- TLSModel::Model Model = TM.getTLSModel(GV);
- return Model == TLSModel::GeneralDynamic ||
- Model == TLSModel::LocalDynamic;
- };
-
auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
@@ -276,8 +383,34 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
return false;
};
+ auto supportedHalfPrecisionOp = [](Instruction *Inst) {
+ switch (Inst->getOpcode()) {
+ default:
+ return false;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::FPToUI:
+ case Instruction::UIToFP:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ return true;
+ }
+ };
+
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
+ // There are no direct operations on half precision so assume that
+ // anything with that type requires a call except for a few select
+ // operations with Power9.
+ if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
+ for (const auto &Op : CurrInst->operands()) {
+ if (Op->getType()->getScalarType()->isHalfTy() ||
+ CurrInst->getType()->getScalarType()->isHalfTy())
+ return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
+ }
+ }
if (CallInst *CI = dyn_cast<CallInst>(J)) {
// Inline ASM is okay, unless it clobbers the ctr register.
if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
@@ -299,6 +432,30 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::loop_decrement:
return true;
+ // Binary operations on 128-bit value will use CTR.
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ if (F->getType()->getScalarType()->isFP128Ty() ||
+ F->getType()->getScalarType()->isPPC_FP128Ty())
+ return true;
+ break;
+
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
+ case Intrinsic::experimental_constrained_sitofp:
+ case Intrinsic::experimental_constrained_uitofp: {
+ Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
+ Type *DstType = CI->getType()->getScalarType();
+ if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
+ isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
+ isLargeIntegerTy(!TM.isPPC64(), DstType))
+ return true;
+ break;
+ }
+
// Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
// because, although it does clobber the counter register, the
// control can't then return to inside the loop unless there is also
@@ -317,6 +474,15 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::pow:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::experimental_constrained_powi:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_pow:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
return true;
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
@@ -338,6 +504,54 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::llround: Opcode = ISD::LLROUND; break;
case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::experimental_constrained_fcmp:
+ Opcode = ISD::STRICT_FSETCC;
+ break;
+ case Intrinsic::experimental_constrained_fcmps:
+ Opcode = ISD::STRICT_FSETCCS;
+ break;
+ case Intrinsic::experimental_constrained_fma:
+ Opcode = ISD::STRICT_FMA;
+ break;
+ case Intrinsic::experimental_constrained_sqrt:
+ Opcode = ISD::STRICT_FSQRT;
+ break;
+ case Intrinsic::experimental_constrained_floor:
+ Opcode = ISD::STRICT_FFLOOR;
+ break;
+ case Intrinsic::experimental_constrained_ceil:
+ Opcode = ISD::STRICT_FCEIL;
+ break;
+ case Intrinsic::experimental_constrained_trunc:
+ Opcode = ISD::STRICT_FTRUNC;
+ break;
+ case Intrinsic::experimental_constrained_rint:
+ Opcode = ISD::STRICT_FRINT;
+ break;
+ case Intrinsic::experimental_constrained_lrint:
+ Opcode = ISD::STRICT_LRINT;
+ break;
+ case Intrinsic::experimental_constrained_llrint:
+ Opcode = ISD::STRICT_LLRINT;
+ break;
+ case Intrinsic::experimental_constrained_nearbyint:
+ Opcode = ISD::STRICT_FNEARBYINT;
+ break;
+ case Intrinsic::experimental_constrained_round:
+ Opcode = ISD::STRICT_FROUND;
+ break;
+ case Intrinsic::experimental_constrained_lround:
+ Opcode = ISD::STRICT_LROUND;
+ break;
+ case Intrinsic::experimental_constrained_llround:
+ Opcode = ISD::STRICT_LLROUND;
+ break;
+ case Intrinsic::experimental_constrained_minnum:
+ Opcode = ISD::STRICT_FMINNUM;
+ break;
+ case Intrinsic::experimental_constrained_maxnum:
+ Opcode = ISD::STRICT_FMAXNUM;
+ break;
case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
}
@@ -486,7 +700,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
}
for (Value *Operand : J->operands())
- if (memAddrUsesCTR(Operand))
+ if (memAddrUsesCTR(Operand, TM, Visited))
return true;
}
@@ -546,6 +760,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
}
}
+ // If an exit block has a PHI that accesses a TLS variable as one of the
+ // incoming values from the loop, we cannot produce a CTR loop because the
+ // address for that value will be computed in the loop.
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (auto &BB : ExitBlocks) {
+ for (auto &PHI : BB->phis()) {
+ for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
+ Idx++) {
+ const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
+ const Value *IncomingValue = PHI.getIncomingValue(Idx);
+ if (L->contains(IncomingBB) &&
+ memAddrUsesCTR(IncomingValue, TM, Visited))
+ return false;
+ }
+ }
+ }
+
LLVMContext &C = L->getHeader()->getContext();
HWLoopInfo.CountType = TM.isPPC64() ?
Type::getInt64Ty(C) : Type::getInt32Ty(C);
@@ -581,10 +813,7 @@ bool PPCTTIImpl::useColdCCForColdCall(Function &F) {
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
- // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
- // on combining the loads generated for consecutive accesses, and failure to
- // do so is particularly expensive. This makes it much more likely (compared
- // to only using concatenation unrolling).
+ // On the A2, always unroll aggressively.
if (ST->getCPUDirective() == PPC::DIR_A2)
return true;
@@ -644,7 +873,6 @@ const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
- if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
return 0;
}
@@ -673,8 +901,6 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
}
unsigned PPCTTIImpl::getPrefetchDistance() const {
- // This seems like a reasonable default for the BG/Q (this pass is enabled, by
- // default, only on the BG/Q).
return 300;
}
@@ -763,7 +989,7 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
@@ -780,11 +1006,12 @@ int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
// TODO: Allow non-throughput costs that aren't binary.
if (CostKind != TTI::TCK_RecipThroughput)
@@ -793,9 +1020,11 @@ int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
}
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
- int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ int Cost =
+ BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
@@ -819,13 +1048,6 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return Cost;
- } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
- // Floating point scalars are already located in index #0.
- if (Index == 0)
- return 0;
-
- return Cost;
-
} else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
if (ST->hasP9Altivec()) {
if (ISD == ISD::INSERT_VECTOR_ELT)
@@ -849,7 +1071,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
// The cost of the load constant for a vector extract is disregarded
// (invariant, easily schedulable).
return vectorCostAdjustment(1, Opcode, Val, nullptr);
-
+
} else if (ST->hasDirectMove())
// Assume permute has standard cost.
// Assume move-to/move-from VSR have 2x standard cost.
@@ -900,8 +1122,6 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
bool IsVSXType = ST->hasVSX() &&
(LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
- bool IsQPXType = ST->hasQPX() &&
- (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
// VSX has 32b/64b load instructions. Legalization can handle loading of
// 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
@@ -924,8 +1144,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// for Altivec types using the VSX instructions, but that's more expensive
// than using the permutation-based load sequence. On the P8, that's no
// longer true.
- if (Opcode == Instruction::Load &&
- ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
+ if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
*Alignment >= LT.second.getScalarType().getStoreSize())
return Cost + LT.first; // Add the cost of the permutations.
@@ -978,7 +1197,7 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
CostKind);
- // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). For each result vector, we need one shuffle per incoming
// vector (except that the first shuffle can take two incoming vectors
@@ -1028,3 +1247,51 @@ bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
else
return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
}
+
+bool PPCTTIImpl::isNumRegsMajorCostOfLSR() {
+ return false;
+}
+
+bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
+ MemIntrinsicInfo &Info) {
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_altivec_lvebx:
+ case Intrinsic::ppc_altivec_lvehx:
+ case Intrinsic::ppc_altivec_lvewx:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x_be:
+ case Intrinsic::ppc_vsx_lxvw4x_be:
+ case Intrinsic::ppc_vsx_lxvl:
+ case Intrinsic::ppc_vsx_lxvll:
+ case Intrinsic::ppc_vsx_lxvp: {
+ Info.PtrVal = Inst->getArgOperand(0);
+ Info.ReadMem = true;
+ Info.WriteMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_altivec_stvebx:
+ case Intrinsic::ppc_altivec_stvehx:
+ case Intrinsic::ppc_altivec_stvewx:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x_be:
+ case Intrinsic::ppc_vsx_stxvw4x_be:
+ case Intrinsic::ppc_vsx_stxvl:
+ case Intrinsic::ppc_vsx_stxvll:
+ case Intrinsic::ppc_vsx_stxvp: {
+ Info.PtrVal = Inst->getArgOperand(1);
+ Info.ReadMem = false;
+ Info.WriteMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index d998521084e1..bc946715156f 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -41,6 +41,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -49,7 +52,8 @@ public:
TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind);
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
@@ -64,12 +68,14 @@ public:
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo);
+ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
+ bool isNumRegsMajorCostOfLSR();
/// @}
@@ -103,10 +109,11 @@ public:
const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::TargetCostKind CostKind,
+ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 3e6d1c7939f1..e72e29112da7 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -315,9 +315,9 @@ protected:
// Extend the live interval of the addend source (it might end at the
// copy to be removed, or somewhere in between there and here). This
// is necessary only if it is a physical register.
- if (!Register::isVirtualRegister(AddendSrcReg))
- for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
- ++Units) {
+ if (!AddendSrcReg.isVirtual())
+ for (MCRegUnitIterator Units(AddendSrcReg.asMCReg(), TRI);
+ Units.isValid(); ++Units) {
unsigned Unit = *Units;
LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index c3729da0b07b..ff251f55afff 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -254,10 +254,11 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (isAnyVecReg(Reg, Partial)) {
+ // All operands need to be checked because there are instructions that
+ // operate on a partial register and produce a full register (such as
+ // XXPERMDIs).
+ if (isAnyVecReg(Reg, Partial))
RelevantInstr = true;
- break;
- }
}
if (!RelevantInstr)
@@ -689,6 +690,29 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
LLVM_DEBUG(UseMI.dump());
LLVM_DEBUG(dbgs() << "\n");
}
+
+ // It is possible that the load feeds a swap and that swap feeds a
+ // store. In such a case, the code is actually trying to store a swapped
+ // vector. We must reject such webs.
+ if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad &&
+ !SwapVector[UseIdx].IsStore) {
+ Register SwapDefReg = UseMI.getOperand(0).getReg();
+ for (MachineInstr &UseOfUseMI :
+ MRI->use_nodbg_instructions(SwapDefReg)) {
+ int UseOfUseIdx = SwapMap[&UseOfUseMI];
+ if (SwapVector[UseOfUseIdx].IsStore) {
+ SwapVector[Repr].WebRejected = 1;
+ LLVM_DEBUG(
+ dbgs() << format(
+ "Web %d rejected for load/swap feeding a store\n", Repr));
+ LLVM_DEBUG(dbgs() << " def " << EntryIdx << ": ");
+ LLVM_DEBUG(MI->dump());
+ LLVM_DEBUG(dbgs() << " use " << UseIdx << ": ");
+ LLVM_DEBUG(UseMI.dump());
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+ }
+ }
}
// Reject webs that contain swapping stores that are fed by something
diff --git a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 649bd648a6cf..6bb952f27fee 100644
--- a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -14,6 +14,10 @@ Target &llvm::getThePPC32Target() {
static Target ThePPC32Target;
return ThePPC32Target;
}
+Target &llvm::getThePPC32LETarget() {
+ static Target ThePPC32LETarget;
+ return ThePPC32LETarget;
+}
Target &llvm::getThePPC64Target() {
static Target ThePPC64Target;
return ThePPC64Target;
@@ -24,9 +28,12 @@ Target &llvm::getThePPC64LETarget() {
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetInfo() {
- RegisterTarget<Triple::ppc, /*HasJIT=*/true> X(getThePPC32Target(), "ppc32",
+ RegisterTarget<Triple::ppc, /*HasJIT=*/true> W(getThePPC32Target(), "ppc32",
"PowerPC 32", "PPC");
+ RegisterTarget<Triple::ppcle, /*HasJIT=*/true> X(
+ getThePPC32LETarget(), "ppc32le", "PowerPC 32 LE", "PPC");
+
RegisterTarget<Triple::ppc64, /*HasJIT=*/true> Y(getThePPC64Target(), "ppc64",
"PowerPC 64", "PPC");
diff --git a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
index 2d0afbfb1be0..f9d20ef00df8 100644
--- a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
+++ b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
@@ -14,6 +14,7 @@ namespace llvm {
class Target;
Target &getThePPC32Target();
+Target &getThePPC32LETarget();
Target &getThePPC64Target();
Target &getThePPC64LETarget();