diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-08-16 21:02:59 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-08-16 21:02:59 +0000 |
commit | 3ca95b020283db6244cab92ede73c969253b6a31 (patch) | |
tree | d16e791e58694facd8f68d3e2797a1eaa8018afc /contrib/llvm/lib/Target/PowerPC | |
parent | 27067774dce3388702a4cf744d7096c6fb71b688 (diff) | |
parent | c3aee98e721333f265a88d6bf348e6e468f027d4 (diff) |
Update llvm to release_39 branch r276489, and resolve conflicts.
Notes
Notes:
svn path=/projects/clang390-import/; revision=304240
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
63 files changed, 5127 insertions, 2160 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 220c70a48542..4181775fc6da 100644 --- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1,4 +1,4 @@ -//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===// +//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions -------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" @@ -22,11 +20,11 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -294,7 +292,7 @@ public: const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(Options, STI), MII(MII) { // Check for 64-bit vs. 32-bit pointer mode. - Triple TheTriple(STI.getTargetTriple()); + const Triple &TheTriple = STI.getTargetTriple(); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || TheTriple.getArch() == Triple::ppc64le); IsDarwin = TheTriple.isMacOSX(); @@ -378,6 +376,10 @@ public: } } + // Disable use of sized deallocation due to overallocation of PPCOperand + // objects in CreateTokenWithStringCopy. + void operator delete(void *p) { ::operator delete(p); } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const override { return StartLoc; } @@ -392,13 +394,15 @@ public: return Imm.Val; } int64_t getImmS16Context() const { - assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!"); + assert((Kind == Immediate || Kind == ContextImmediate) && + "Invalid access!"); if (Kind == Immediate) return Imm.Val; return static_cast<int16_t>(Imm.Val); } int64_t getImmU16Context() const { - assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!"); + assert((Kind == Immediate || Kind == ContextImmediate) && + "Invalid access!"); return Imm.Val; } @@ -443,7 +447,9 @@ public: } bool isToken() const override { return Kind == Token; } - bool isImm() const override { return Kind == Immediate || Kind == Expression; } + bool isImm() const override { + return Kind == Immediate || Kind == Expression; + } bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); } bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); } @@ -454,13 +460,15 @@ public: bool isU6ImmX2() const { return Kind == Immediate && isUInt<6>(getImm()) && (getImm() & 1) == 0; } + bool isU7Imm() const { return Kind == Immediate && isUInt<7>(getImm()); } bool isU7ImmX4() const { return Kind == Immediate && isUInt<7>(getImm()) && (getImm() & 3) == 0; } + bool isU8Imm() const { return Kind == Immediate && isUInt<8>(getImm()); } bool isU8ImmX8() const { return Kind == Immediate && isUInt<8>(getImm()) && (getImm() & 7) == 0; } - + bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); } bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); } bool isU16Imm() const { @@ -488,6 +496,9 @@ public: bool isS16ImmX4() const { return Kind == Expression || (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } + bool isS16ImmX16() const { return Kind == Expression || + (Kind == Immediate && isInt<16>(getImm()) && + (getImm() & 15) == 0); } bool isS17Imm() const { switch (Kind) { case Expression: @@ -521,7 +532,9 @@ public: (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } - bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); } + bool isVSRegNumber() const { + return Kind == Immediate && isUInt<6>(getImm()); + } bool isCCRegNumber() const { return (Kind == Expression && isUInt<3>(getExprCRVal())) || (Kind == Immediate @@ -1190,6 +1203,29 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, } break; } + case PPC::CP_COPYx: + case PPC::CP_COPY_FIRST: { + MCInst TmpInst; + TmpInst.setOpcode(PPC::CP_COPY); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::createImm(Opcode == PPC::CP_COPYx ? 0 : 1)); + + Inst = TmpInst; + break; + } + case PPC::CP_PASTEx : + case PPC::CP_PASTE_LAST: { + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::CP_PASTEx ? + PPC::CP_PASTE : PPC::CP_PASTEo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::createImm(Opcode == PPC::CP_PASTEx ? 0 : 1)); + + Inst = TmpInst; + break; + } } } @@ -1454,8 +1490,8 @@ ParseExpression(const MCExpr *&EVal) { /// This differs from the default "parseExpression" in that it handles detection /// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present, /// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO -/// syntax form so it is done here. TODO: Determine if there is merit in arranging -/// for this to be done at a higher level. +/// syntax form so it is done here. TODO: Determine if there is merit in +/// arranging for this to be done at a higher level. bool PPCAsmParser:: ParseDarwinExpression(const MCExpr *&EVal) { MCAsmParser &Parser = getParser(); @@ -1674,7 +1710,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, while (getLexer().isNot(AsmToken::EndOfStatement) && getLexer().is(AsmToken::Comma)) { // Consume the comma token - getLexer().Lex(); + Lex(); // Parse the next operand if (ParseOperand(Operands)) diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 1fc84fb76551..6ea4fb1bfbc3 100644 --- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -368,6 +368,21 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm, return MCDisassembler::Success; } +static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // Decode the memrix16 field (imm, reg), which has the low 12-bits as the + // displacement with 16-byte aligned, and the next 5 bits as the register #. + + uint64_t Base = Imm >> 12; + uint64_t Disp = Imm & 0xFFF; + + assert(Base < 32 && "Invalid base register"); + + Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 4))); + Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + return MCDisassembler::Success; +} + static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // The cr bit encoding is 0x80 >> cr_reg_num. diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 5e1d22789056..d9d9b4f180f7 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -136,17 +136,6 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is - // used when converting a 32-bit float to a 64-bit float as part of - // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()), - // as otherwise we have problems with incorrect register classes - // in machine instruction verification. For now, just avoid trying - // to print it as such an instruction has no effect (a 32-bit float - // in a register is already in 64-bit form, just with lower - // precision). FIXME: Is there a better solution? - if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS) - return; - if (!printAliasInstr(MI, O)) printInstruction(MI, O); printAnnotation(O, Annot); @@ -299,6 +288,20 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, O << (unsigned int)Value; } +void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 127 && "Invalid u7imm argument!"); + O << (unsigned int)Value; +} + +void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 255 && "Invalid u8imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned short Value = MI->getOperand(OpNo).getImm(); diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 53eb727d0b07..d0ffeff0247c 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -53,6 +53,8 @@ public: void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU7ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index b6dd595ffb0e..9100ecb4aa37 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -168,8 +168,8 @@ public: llvm_unreachable("relaxInstruction() unimplemented"); } - - void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index dd994956870f..fd279c60f3f5 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -25,8 +25,8 @@ namespace { PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI); protected: - unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const override; + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; @@ -66,7 +66,7 @@ static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target, llvm_unreachable("unknown PPCMCExpr kind"); } -unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, +unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup); @@ -186,7 +186,7 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TOC_HA: Type = ELF::R_PPC64_TOC16_HA; break; - case MCSymbolRefExpr::VK_PPC_TPREL: + case MCSymbolRefExpr::VK_TPREL: Type = ELF::R_PPC_TPREL16; break; case MCSymbolRefExpr::VK_PPC_TPREL_LO: @@ -210,7 +210,7 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TPREL_HIGHESTA: Type = ELF::R_PPC64_TPREL16_HIGHESTA; break; - case MCSymbolRefExpr::VK_PPC_DTPREL: + case MCSymbolRefExpr::VK_DTPREL: Type = ELF::R_PPC64_DTPREL16; break; case MCSymbolRefExpr::VK_PPC_DTPREL_LO: @@ -319,13 +319,13 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TOC_LO: Type = ELF::R_PPC64_TOC16_LO_DS; break; - case MCSymbolRefExpr::VK_PPC_TPREL: + case MCSymbolRefExpr::VK_TPREL: Type = ELF::R_PPC64_TPREL16_DS; break; case MCSymbolRefExpr::VK_PPC_TPREL_LO: Type = ELF::R_PPC64_TPREL16_LO_DS; break; - case MCSymbolRefExpr::VK_PPC_DTPREL: + case MCSymbolRefExpr::VK_DTPREL: Type = ELF::R_PPC64_DTPREL16_DS; break; case MCSymbolRefExpr::VK_PPC_DTPREL_LO: @@ -380,10 +380,10 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_DTPMOD: Type = ELF::R_PPC64_DTPMOD64; break; - case MCSymbolRefExpr::VK_PPC_TPREL: + case MCSymbolRefExpr::VK_TPREL: Type = ELF::R_PPC64_TPREL64; break; - case MCSymbolRefExpr::VK_PPC_DTPREL: + case MCSymbolRefExpr::VK_DTPREL: Type = ELF::R_PPC64_DTPREL64; break; } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index b7291561c75d..e7b2d8369f2f 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -69,6 +69,9 @@ public: unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; @@ -102,19 +105,16 @@ public: void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override { - // For fast-isel, a float COPY_TO_REGCLASS can survive this long. - // It's just a nop to keep the register classes happy, so don't - // generate anything. unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MCII.get(Opcode); - if (Opcode == TargetOpcode::COPY_TO_REGCLASS) - return; uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); // Output the constant in big/little endian byte order. unsigned Size = Desc.getSize(); switch (Size) { + case 0: + break; case 4: if (IsLittleEndian) { support::endian::Writer<support::little>(OS).write<uint32_t>(Bits); @@ -249,6 +249,19 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, return RegBits; } +unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + // Encode (imm, reg) as a memrix16, which has the low 12-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12; + + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isImm()); + + return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits; +} unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 30f232a9a91e..c9074448fe45 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -15,7 +15,6 @@ #include "InstPrinter/PPCInstPrinter.h" #include "PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" -#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" @@ -87,24 +86,13 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createPPCMCCodeGenInfo(const Triple &TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - - if (RM == Reloc::Default) { - if (TT.isOSDarwin()) - RM = Reloc::DynamicNoPIC; - else - RM = Reloc::Static; - } +static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM, + CodeModel::Model &CM) { if (CM == CodeModel::Default) { if (!TT.isOSDarwin() && (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)) CM = CodeModel::Medium; } - X->initMCCodeGenInfo(RM, CM, OL); - return X; } namespace { @@ -245,7 +233,7 @@ extern "C" void LLVMInitializePowerPCTargetMC() { RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo); // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo); + TargetRegistry::registerMCAdjustCodeGenOpts(*T, adjustCodeGenOpts); // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo); diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index b54a0e1b86b1..1f38a8c947e7 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -79,7 +79,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { } /// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum. -/// Outline based on PPCELFObjectWriter::GetRelocType(). +/// Outline based on PPCELFObjectWriter::getRelocType(). static unsigned getRelocType(const MCValue &Target, const MCFixupKind FixupKind, // from // Fixup.getKind() diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index a259ed3fd327..e01f49dce81e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -16,7 +16,6 @@ #define LLVM_LIB_TARGET_POWERPC_PPC_H #include "MCTargetDesc/PPCMCTargetDesc.h" -#include <string> // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC @@ -34,7 +33,6 @@ namespace llvm { #ifndef NDEBUG FunctionPass *createPPCCTRLoopsVerify(); #endif - FunctionPass *createPPCLoopDataPrefetchPass(); FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM); FunctionPass *createPPCTOCRegDepsPass(); FunctionPass *createPPCEarlyReturnPass(); @@ -43,6 +41,7 @@ namespace llvm { FunctionPass *createPPCVSXSwapRemovalPass(); FunctionPass *createPPCMIPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); + FunctionPass *createPPCQPXLoadSplatPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCTLSDynamicCallPass(); FunctionPass *createPPCBoolRetToIntPass(); @@ -60,13 +59,12 @@ namespace llvm { //===------------------------------------------------------------------===// // PPC Specific MachineOperand flags. MO_NO_FLAG, - - /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "FOO$stub" or "FOO@plt" symbol. This is - /// used for calls and jumps to external functions on Tiger and earlier, and + + /// On a symbol operand "FOO", this indicates that the reference is actually + /// to "FOO@plt". This is used for calls and jumps to external functions on /// for PIC calls on Linux and ELF systems. - MO_PLT_OR_STUB = 1, - + MO_PLT = 1, + /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to /// the function's picbase, e.g. lo16(symbol-picbase). MO_PIC_FLAG = 2, @@ -74,7 +72,7 @@ namespace llvm { /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase). MO_NLP_FLAG = 4, - + /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a /// symbol with hidden visibility. This causes a different kind of /// non-lazy-pointer to be generated. @@ -93,11 +91,11 @@ namespace llvm { /// These values identify relocations on immediates folded /// into memory operations. MO_DTPREL_LO = 5 << 4, - MO_TLSLD_LO = 6 << 4, - MO_TOC_LO = 7 << 4, + MO_TLSLD_LO = 6 << 4, + MO_TOC_LO = 7 << 4, // Symbol for VK_PPC_TLS fixup attached to an ADD instruction - MO_TLS = 8 << 4 + MO_TLS = 8 << 4 }; } // end namespace PPCII diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index b03be12cfd97..b40b530f4c5d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -37,16 +37,19 @@ def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E500mc", "">; -def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", +def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E5500", "">; def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">; def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">; def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">; -def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">; +def DirectivePwr5x + : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; -def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; +def DirectivePwr6x + : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; +def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -86,8 +89,6 @@ def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true", "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; -def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", - "Enable the popcnt[dw] instructions">; def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true", "Enable the bpermd instruction">; def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true", @@ -145,24 +146,43 @@ def FeatureFloat128 : SubtargetFeature<"float128", "HasFloat128", "true", "Enable the __float128 data type for IEEE-754R Binary128.", [FeatureVSX]>; +def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", + "POPCNTD_Fast", + "Enable the popcnt[dw] instructions">; +// Note that for the a2/a2q processor models we should not use popcnt[dw] by +// default. These processors do support the instructions, but they're +// microcoded, and the software emulation is about twice as fast. +def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD", + "POPCNTD_Slow", + "Has slow popcnt[dw] instructions">; def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", "Treat vector data stream cache control instructions as deprecated">; -/* Since new processors generally contain a superset of features of those that - came before them, the idea is to make implementations of new processors - less error prone and easier to read. - Namely: - list<SubtargetFeature> Power8FeatureList = ... - list<SubtargetFeature> FutureProcessorSpecificFeatureList = - [ features that Power8 does not support ] - list<SubtargetFeature> FutureProcessorFeatureList = - !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) +def FeatureISA3_0 : SubtargetFeature<"isa-v30-instructions", "IsISA3_0", + "true", + "Enable instructions added in ISA 3.0.">; +def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", + "Enable POWER9 Altivec instructions", + [FeatureISA3_0, FeatureP8Altivec]>; +def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true", + "Enable POWER9 vector instructions", + [FeatureISA3_0, FeatureP8Vector, + FeatureP9Altivec]>; - Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as - well as providing a single point of definition if the feature set will be - used elsewhere. -*/ +// Since new processors generally contain a superset of features of those that +// came before them, the idea is to make implementations of new processors +// less error prone and easier to read. +// Namely: +// list<SubtargetFeature> Power8FeatureList = ... +// list<SubtargetFeature> FutureProcessorSpecificFeatureList = +// [ features that Power8 does not support ] +// list<SubtargetFeature> FutureProcessorFeatureList = +// !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) + +// Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as +// well as providing a single point of definition if the feature set will be +// used elsewhere. def ProcessorFeatures { list<SubtargetFeature> Power7FeatureList = [DirectivePwr7, FeatureAltivec, FeatureVSX, @@ -180,6 +200,10 @@ def ProcessorFeatures { FeatureFusion]; list<SubtargetFeature> Power8FeatureList = !listconcat(Power7FeatureList, Power8SpecificFeatures); + list<SubtargetFeature> Power9SpecificFeatures = + [FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0]; + list<SubtargetFeature> Power9FeatureList = + !listconcat(Power8FeatureList, Power9SpecificFeatures); } // Note: Future features to add when support is extended to more @@ -331,16 +355,17 @@ def : ProcessorModel<"a2", PPCA2Model, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */, FeatureMFTB]>; + FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; def : ProcessorModel<"a2q", PPCA2Model, [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */, FeatureQPX, FeatureMFTB]>; + FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, FeatureQPX, + FeatureMFTB]>; def : ProcessorModel<"pwr3", G5Model, [DirectivePwr3, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF, @@ -377,6 +402,8 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; +// FIXME: Same as P8 until the POWER9 scheduling info is available +def : ProcessorModel<"pwr9", P8Model, ProcessorFeatures.Power9FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32, FeatureMFTB]>; def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index ec354c209ca0..76c52ab6cf1e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -25,7 +25,6 @@ #include "PPCTargetMachine.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -50,7 +49,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -82,6 +80,12 @@ public: MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); + virtual bool doInitialization(Module &M) override { + if (!TOC.empty()) + TOC.clear(); + return AsmPrinter::doInitialization(M); + } + void EmitInstruction(const MachineInstr *MI) override; void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); @@ -138,8 +142,6 @@ public: bool doFinalization(Module &M) override; void EmitStartOfAsmFile(Module &M) override; - - void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs); }; } // end of anonymous namespace @@ -195,29 +197,14 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, MCSymbol *SymToPrint; // External or weakly linked global variables need non-lazily-resolved stubs - if (TM.getRelocationModel() != Reloc::Static && - !GV->isStrongDefinitionForLinker()) { - if (!GV->hasHiddenVisibility()) { - SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry( - SymToPrint); - if (!StubSym.getPointer()) - StubSym = MachineModuleInfoImpl:: - StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); - } else if (GV->isDeclaration() || GV->hasCommonLinkage() || - GV->hasAvailableExternallyLinkage()) { - SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry( - SymToPrint); - if (!StubSym.getPointer()) - StubSym = MachineModuleInfoImpl:: - StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); - } else { - SymToPrint = getSymbol(GV); - } + if (Subtarget->hasLazyResolverStub(GV)) { + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry( + SymToPrint); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), + !GV->hasInternalLinkage()); } else { SymToPrint = getSymbol(GV); } @@ -470,7 +457,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, "GETtls[ld]ADDR[32] must read GPR3"); if (!Subtarget->isPPC64() && !Subtarget->isDarwin() && - TM.getRelocationModel() == Reloc::PIC_) + isPositionIndependent()) Kind = MCSymbolRefExpr::VK_PLT; const MCSymbolRefExpr *TlsRef = MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext); @@ -597,7 +584,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { else if (MO.isBlockAddress()) MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - if (PL == PICLevel::Small) { + if (PL == PICLevel::SmallPIC) { const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT, OutContext); @@ -1038,10 +1025,10 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { } if (static_cast<const PPCTargetMachine &>(TM).isPPC64() || - TM.getRelocationModel() != Reloc::PIC_) + !isPositionIndependent()) return AsmPrinter::EmitStartOfAsmFile(M); - if (M.getPICLevel() == PICLevel::Small) + if (M.getPICLevel() == PICLevel::SmallPIC) return AsmPrinter::EmitStartOfAsmFile(M); OutStreamer->SwitchSection(OutContext.getELFSection( @@ -1067,8 +1054,8 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // linux/ppc32 - Normal entry label. if (!Subtarget->isPPC64() && - (TM.getRelocationModel() != Reloc::PIC_ || - MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small)) + (!isPositionIndependent() || + MF->getFunction()->getParent()->getPICLevel() == PICLevel::SmallPIC)) return AsmPrinter::EmitFunctionEntryLabel(); if (!Subtarget->isPPC64()) { @@ -1302,8 +1289,10 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "power6", "power6x", "power7", + // FIXME: why is power8 missing here? "ppc64", - "ppc64le" + "ppc64le", + "power9" }; // Get the numerically largest directive. @@ -1350,161 +1339,6 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); } -static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) { - // Remove $stub suffix, add $lazy_ptr. - StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5); - return Ctx.getOrCreateSymbol(NoStub + "$lazy_ptr"); -} - -static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { - // Add $tmp suffix to $stub, yielding $stub$tmp. - return Ctx.getOrCreateSymbol(Sym->getName() + "$tmp"); -} - -void PPCDarwinAsmPrinter:: -EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { - bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64; - - // Construct a local MCSubtargetInfo and shadow EmitToStreamer here. - // This is because the MachineFunction won't exist (but have not yet been - // freed) and since we're at the global level we can use the default - // constructed subtarget. - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple().str(), TM.getTargetCPU(), - TM.getTargetFeatureString())); - auto EmitToStreamer = [&STI] (MCStreamer &S, const MCInst &Inst) { - S.EmitInstruction(Inst, *STI); - }; - - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); - - // .lazy_symbol_pointer - MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection(); - - // Output stubs for dynamically-linked functions - if (TM.getRelocationModel() == Reloc::PIC_) { - MCSection *StubSection = OutContext.getMachOSection( - "__TEXT", "__picsymbolstub1", - MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 32, - SectionKind::getText()); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer->SwitchSection(StubSection); - EmitAlignment(4); - - MCSymbol *Stub = Stubs[i].first; - MCSymbol *RawSym = Stubs[i].second.getPointer(); - MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); - MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext); - - OutStreamer->EmitLabel(Stub); - OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - const MCExpr *Anon = MCSymbolRefExpr::create(AnonSymbol, OutContext); - const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext); - const MCExpr *Sub = - MCBinaryExpr::createSub(LazyPtrExpr, Anon, OutContext); - - // mflr r0 - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); - // bcl 20, 31, AnonSymbol - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon)); - OutStreamer->EmitLabel(AnonSymbol); - // mflr r11 - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); - // addis r11, r11, ha16(LazyPtr - AnonSymbol) - const MCExpr *SubHa16 = PPCMCExpr::createHa(Sub, true, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) - .addReg(PPC::R11) - .addReg(PPC::R11) - .addExpr(SubHa16)); - // mtlr r0 - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); - - // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) - // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) - const MCExpr *SubLo16 = PPCMCExpr::createLo(Sub, true, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) - .addReg(PPC::R12) - .addExpr(SubLo16).addExpr(SubLo16) - .addReg(PPC::R11)); - // mtctr r12 - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); - // bctr - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR)); - - OutStreamer->SwitchSection(LSPSection); - OutStreamer->EmitLabel(LazyPtr); - OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - MCSymbol *DyldStubBindingHelper = - OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper")); - if (isPPC64) { - // .quad dyld_stub_binding_helper - OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8); - } else { - // .long dyld_stub_binding_helper - OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4); - } - } - OutStreamer->AddBlankLine(); - return; - } - - MCSection *StubSection = OutContext.getMachOSection( - "__TEXT", "__symbol_stub1", - MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 16, - SectionKind::getText()); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - MCSymbol *Stub = Stubs[i].first; - MCSymbol *RawSym = Stubs[i].second.getPointer(); - MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); - const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext); - - OutStreamer->SwitchSection(StubSection); - EmitAlignment(4); - OutStreamer->EmitLabel(Stub); - OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - // lis r11, ha16(LazyPtr) - const MCExpr *LazyPtrHa16 = - PPCMCExpr::createHa(LazyPtrExpr, true, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LIS) - .addReg(PPC::R11) - .addExpr(LazyPtrHa16)); - - // ldu r12, lo16(LazyPtr)(r11) - // lwzu r12, lo16(LazyPtr)(r11) - const MCExpr *LazyPtrLo16 = - PPCMCExpr::createLo(LazyPtrExpr, true, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) - .addReg(PPC::R12) - .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) - .addReg(PPC::R11)); - - // mtctr r12 - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); - // bctr - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR)); - - OutStreamer->SwitchSection(LSPSection); - OutStreamer->EmitLabel(LazyPtr); - OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - MCSymbol *DyldStubBindingHelper = - OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper")); - if (isPPC64) { - // .quad dyld_stub_binding_helper - OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8); - } else { - // .long dyld_stub_binding_helper - OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4); - } - } - - OutStreamer->AddBlankLine(); -} - bool PPCDarwinAsmPrinter::doFinalization(Module &M) { bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64; @@ -1514,10 +1348,6 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList(); - if (!Stubs.empty()) - EmitFunctionStubs(Stubs); - if (MAI->doesSupportExceptionHandling() && MMI) { // Add the (possibly multiple) personalities to the set of global values. // Only referenced functions get into the Personalities list. @@ -1534,7 +1364,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { } // Output stubs for dynamically-linked functions. - Stubs = MMIMacho.GetGVStubList(); + MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList(); // Output macho stubs for external and common global variables. if (!Stubs.empty()) { @@ -1568,25 +1398,6 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer->AddBlankLine(); } - Stubs = MMIMacho.GetHiddenGVStubList(); - if (!Stubs.empty()) { - OutStreamer->SwitchSection(getObjFileLowering().getDataSection()); - EmitAlignment(isPPC64 ? 3 : 2); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer->EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer->EmitValue(MCSymbolRefExpr:: - create(Stubs[i].second.getPointer(), - OutContext), - isPPC64 ? 8 : 4/*size*/); - } - - Stubs.clear(); - OutStreamer->AddBlankLine(); - } - // Funny Darwin hack: This flag tells the linker that no global symbols // contain code that falls through to other global symbols (e.g. the obvious // implementation of multiple entry points). If this doesn't occur, the diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp index 7920240bc2b9..bfb4d8756901 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -119,7 +119,7 @@ class PPCBoolRetToInt : public FunctionPass { Promotable.insert(P); SmallVector<const PHINode *, 8> ToRemove; - for (const auto &P : Promotable) { + for (const PHINode *P : Promotable) { // Condition 2 and 3 auto IsValidUser = [] (const Value *V) -> bool { return isa<ReturnInst>(V) || isa<CallInst>(V) || isa<PHINode>(V) || @@ -146,7 +146,7 @@ class PPCBoolRetToInt : public FunctionPass { Promotable.erase(User); ToRemove.clear(); - for (const auto &P : Promotable) { + for (const PHINode *P : Promotable) { // Condition 4 and 5 const auto &Users = P->users(); const auto &Operands = P->operands(); @@ -168,6 +168,9 @@ class PPCBoolRetToInt : public FunctionPass { } bool runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); B2IMap Bool2IntMap; bool Changed = false; @@ -199,11 +202,11 @@ class PPCBoolRetToInt : public FunctionPass { // Presently, we only know how to handle PHINode, Constant, and Arguments. // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension // could also be handled in the future. - for (const auto &V : Defs) + for (Value *V : Defs) if (!isa<PHINode>(V) && !isa<Constant>(V) && !isa<Argument>(V)) return false; - for (const auto &V : Defs) + for (Value *V : Defs) if (const PHINode *P = dyn_cast<PHINode>(V)) if (!PromotablePHINodes.count(P)) return false; @@ -214,7 +217,7 @@ class PPCBoolRetToInt : public FunctionPass { ++NumBoolCallPromotion; ++NumBoolToIntPromotion; - for (const auto &V : Defs) + for (Value *V : Defs) if (!BoolToIntMap.count(V)) BoolToIntMap[V] = translate(V); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index 73a5305197ad..4d63c5b5703c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -46,6 +46,11 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "PowerPC Branch Selector"; } @@ -102,10 +107,9 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { } unsigned BlockSize = 0; - for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); - MBBI != EE; ++MBBI) - BlockSize += TII->GetInstSizeInBytes(MBBI); - + for (MachineInstr &MI : *MBB) + BlockSize += TII->GetInstSizeInBytes(MI); + BlockSizes[MBB->getNumber()] = BlockSize; FuncSize += BlockSize; } @@ -151,7 +155,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { Dest = I->getOperand(0).getMBB(); if (!Dest) { - MBBStartOffset += TII->GetInstSizeInBytes(I); + MBBStartOffset += TII->GetInstSizeInBytes(*I); continue; } @@ -234,4 +238,3 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { BlockSizes.clear(); return true; } - diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp new file mode 100644 index 000000000000..5510a95430f5 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCCCState.cpp @@ -0,0 +1,36 @@ +//===---- PPCCCState.cpp - CCState with PowerPC specific extensions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPCCCState.h" +#include "PPCSubtarget.h" +#include "llvm/IR/Module.h" +using namespace llvm; + +// Identify lowered values that originated from ppcf128 arguments and record +// this. +void PPCCCState::PreAnalyzeCallOperands( + const SmallVectorImpl<ISD::OutputArg> &Outs) { + for (const auto &I : Outs) { + if (I.ArgVT == llvm::MVT::ppcf128) + OriginalArgWasPPCF128.push_back(true); + else + OriginalArgWasPPCF128.push_back(false); + } +} + +void PPCCCState::PreAnalyzeFormalArguments( + const SmallVectorImpl<ISD::InputArg> &Ins) { + for (const auto &I : Ins) { + if (I.ArgVT == llvm::MVT::ppcf128) { + OriginalArgWasPPCF128.push_back(true); + } else { + OriginalArgWasPPCF128.push_back(false); + } + } +}
\ No newline at end of file diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCCState.h b/contrib/llvm/lib/Target/PowerPC/PPCCCState.h new file mode 100644 index 000000000000..9be9f11dbea3 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCCCState.h @@ -0,0 +1,42 @@ +//===---- PPCCCState.h - CCState with PowerPC specific extensions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCCCSTATE_H +#define PPCCCSTATE_H + +#include "PPCISelLowering.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CallingConvLower.h" + +namespace llvm { + +class PPCCCState : public CCState { +public: + + void + PreAnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs); + void + PreAnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins); + +private: + + // Records whether the value has been lowered from an ppcf128. + SmallVector<bool, 4> OriginalArgWasPPCF128; + +public: + PPCCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) + : CCState(CC, isVarArg, MF, locs, C) {} + + bool WasOriginalArgPPCF128(unsigned ValNo) { return OriginalArgWasPPCF128[ValNo]; } + void clearWasPPCF128() { OriginalArgWasPPCF128.clear(); } +}; +} + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index b6ac4d54d4c7..875226635917 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -54,9 +54,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #endif -#include <algorithm> -#include <vector> - using namespace llvm; #define DEBUG_TYPE "ctrloops" @@ -169,6 +166,9 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() { #endif // NDEBUG bool PPCCTRLoops::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -245,7 +245,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { if (Function *F = CI->getCalledFunction()) { // Most intrinsics don't become function calls, but some might. // sin, cos, exp and log are always calls. - unsigned Opcode; + unsigned Opcode = 0; if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { switch (F->getIntrinsicID()) { default: continue; @@ -305,6 +305,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; + case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; } } @@ -364,8 +366,18 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case LibFunc::truncf: case LibFunc::truncl: Opcode = ISD::FTRUNC; break; + case LibFunc::fmin: + case LibFunc::fminf: + case LibFunc::fminl: + Opcode = ISD::FMINNUM; break; + case LibFunc::fmax: + case LibFunc::fmaxf: + case LibFunc::fmaxl: + Opcode = ISD::FMAXNUM; break; } + } + if (Opcode) { auto &DL = CI->getModule()->getDataLayout(); MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(), true); @@ -422,6 +434,25 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) return true; } + + if (TM->getSubtargetImpl(*BB->getParent())->getTargetLowering()->useSoftFloat()) { + switch(J->getOpcode()) { + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FCmp: + return true; + } + } + for (Value *Operand : J->operands()) if (memAddrUsesCTR(TM, Operand)) return true; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td index 5bc9124f8085..53d2f77ff918 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -23,6 +23,9 @@ class CCIfNotSubtarget<string F, CCAction A> "(State.getMachineFunction().getSubtarget()).", F), A>; +class CCIfOrigArgWasNotPPCF128<CCAction A> + : CCIf<"!static_cast<PPCCCState *>(&State)->WasOriginalArgPPCF128(ValNo)", + A>; //===----------------------------------------------------------------------===// // Return Value Calling Convention @@ -109,7 +112,7 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, CCIfType<[i32], CCPromoteToType<i64>>, - CCIfType<[i64], CCAssignToReg<[X3, X4]>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, @@ -131,7 +134,14 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. - CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, + CCIfType<[i32], + CCIfSplit<CCIfSubtarget<"useSoftFloat()", + CCIfOrigArgWasNotPPCF128< + CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>>, + + CCIfType<[i32], + CCIfSplit<CCIfNotSubtarget<"useSoftFloat()", + CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>, // The 'nest' parameter, if any, is passed in R11. CCIfNest<CCAssignToReg<[R11]>>, @@ -243,12 +253,23 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, F27, F28, F29, F30, F31, CR2, CR3, CR4 )>; +// CSRs that are handled by prologue, epilogue. +def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>; + +def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>; + def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>; +def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>; + def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>; +def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>; + def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>; +def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>; + def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10), diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp index 7cb1bb54c725..fcd2f50e1e3d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" -#include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -84,7 +83,7 @@ protected: // This is an unconditional branch to the return. Replace the // branch with a blr. BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode())) - .copyImplicitOps(I); + .copyImplicitOps(*I); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -98,7 +97,7 @@ protected: BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR)) .addImm(J->getOperand(0).getImm()) .addReg(J->getOperand(1).getReg()) - .copyImplicitOps(I); + .copyImplicitOps(*I); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -113,7 +112,7 @@ protected: **PI, J, J->getDebugLoc(), TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn)) .addReg(J->getOperand(0).getReg()) - .copyImplicitOps(I); + .copyImplicitOps(*I); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -174,6 +173,9 @@ protected: public: bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(*MF.getFunction())) + return false; + TII = MF.getSubtarget().getInstrInfo(); bool Changed = false; @@ -192,6 +194,11 @@ public: return Changed; } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } @@ -204,4 +211,3 @@ INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE, char PPCEarlyReturn::ID = 0; FunctionPass* llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); } - diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 16dcd468c91d..7e92042d2f96 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -16,6 +16,7 @@ #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCCallingConv.h" +#include "PPCCCState.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" @@ -158,7 +159,7 @@ class PPCFastISel final : public FastISel { unsigned FP64LoadOpc = PPC::LFD); bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); bool PPCComputeAddress(const Value *Obj, Address &Addr); - void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, + void PPCSimplifyAddress(Address &Addr, bool &UseOffset, unsigned &IndexReg); bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt); @@ -185,7 +186,7 @@ class PPCFastISel final : public FastISel { unsigned &NumBytes, bool IsVarArg); bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes); - CCAssignFn *usePPC32CCs(unsigned Flag); + LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag); private: #include "PPCGenFastISel.inc" @@ -196,7 +197,7 @@ class PPCFastISel final : public FastISel { #include "PPCGenCallingConv.inc" -// Function whose sole purpose is to kill compiler warnings +// Function whose sole purpose is to kill compiler warnings // stemming from unused functions included from PPCGenCallingConv.inc. CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) { if (Flag == 1) @@ -213,13 +214,29 @@ static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { switch (Pred) { // These are not representable with any single compare. case CmpInst::FCMP_FALSE: + case CmpInst::FCMP_TRUE: + // Major concern about the following 6 cases is NaN result. The comparison + // result consists of 4 bits, indicating lt, eq, gt and un (unordered), + // only one of which will be set. The result is generated by fcmpu + // instruction. However, bc instruction only inspects one of the first 3 + // bits, so when un is set, bc instruction may jump to to an undesired + // place. + // + // More specifically, if we expect an unordered comparison and un is set, we + // expect to always go to true branch; in such case UEQ, UGT and ULT still + // give false, which are undesired; but UNE, UGE, ULE happen to give true, + // since they are tested by inspecting !eq, !lt, !gt, respectively. + // + // Similarly, for ordered comparison, when un is set, we always expect the + // result to be false. In such case OGT, OLT and OEQ is good, since they are + // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE + // and ONE are tested through !lt, !gt and !eq, and these are true. case CmpInst::FCMP_UEQ: case CmpInst::FCMP_UGT: - case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULT: - case CmpInst::FCMP_ULE: - case CmpInst::FCMP_UNE: - case CmpInst::FCMP_TRUE: + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ONE: default: return Optional<PPC::Predicate>(); @@ -232,7 +249,7 @@ static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { case CmpInst::ICMP_SGT: return PPC::PRED_GT; - case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: case CmpInst::ICMP_UGE: case CmpInst::ICMP_SGE: return PPC::PRED_GE; @@ -242,12 +259,12 @@ static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { case CmpInst::ICMP_SLT: return PPC::PRED_LT; - case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: case CmpInst::ICMP_ULE: case CmpInst::ICMP_SLE: return PPC::PRED_LE; - case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: case CmpInst::ICMP_NE: return PPC::PRED_NE; @@ -412,7 +429,7 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { // Fix up some addresses that can't be used directly. For example, if // an offset won't fit in an instruction field, we may need to move it // into an index register. -void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, +void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset, unsigned &IndexReg) { // Check whether the offset fits in the instruction field. @@ -431,8 +448,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, } if (!UseOffset) { - IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context) - : Type::getInt64Ty(*Context)); + IntegerType *OffsetTy = Type::getInt64Ty(*Context); const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset)); IndexReg = PPCMaterializeInt(Offset, MVT::i64); @@ -501,7 +517,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // If necessary, materialize the offset into a register and use // the indexed form. Also handle stack pointers with special needs. unsigned IndexReg = 0; - PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + PPCSimplifyAddress(Addr, UseOffset, IndexReg); // If this is a potential VSX load with an offset of 0, a VSX indexed load can // be used. @@ -637,7 +653,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // If necessary, materialize the offset into a register and use // the indexed form. Also handle stack pointers with special needs. unsigned IndexReg = 0; - PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + PPCSimplifyAddress(Addr, UseOffset, IndexReg); // If this is a potential VSX store with an offset of 0, a VSX indexed store // can be used. @@ -1068,10 +1084,10 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) return 0; - // Reload it into a GPR. If we want an i32, modify the address - // to have a 4-byte offset so we load from the right place. + // Reload it into a GPR. If we want an i32 on big endian, modify the + // address to have a 4-byte offset so we load from the right place. if (VT == MVT::i32) - Addr.Offset = 4; + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; // Look at the currently assigned register for this instruction // to determine the required register class. @@ -1115,14 +1131,13 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { return false; // Convert f32 to f64 if necessary. This is just a meaningless copy - // to get the register class right. COPY_TO_REGCLASS is needed since - // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream. + // to get the register class right. const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); if (InRC == &PPC::F4RCRegClass) { unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg) - .addReg(SrcReg).addImm(PPC::F8RCRegClassID); + TII.get(TargetOpcode::COPY), TmpReg) + .addReg(SrcReg); SrcReg = TmpReg; } @@ -1583,6 +1598,9 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + const ReturnInst *Ret = cast<ReturnInst>(I); const Function &F = *I->getParent()->getParent(); @@ -2071,7 +2089,6 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, return TmpReg3; } - // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT, @@ -2085,12 +2102,12 @@ unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT, return ImmReg; } - if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && - VT != MVT::i8 && VT != MVT::i1) + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && + VT != MVT::i1) return 0; - const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : - &PPC::GPRCRegClass); + const TargetRegisterClass *RC = + ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue(); // If the constant is in range, use a load-immediate. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 3fd509ae27f4..c480430dd29a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -76,9 +76,7 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { // SVR4 ABI: First slot in the general register save area. return STI.isPPC64() ? -16U - : (STI.getTargetMachine().getRelocationModel() == Reloc::PIC_) - ? -12U - : -8U; + : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; } PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) @@ -596,7 +594,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, (!UseAtEnd && (&MBB->getParent()->front() == MBB))) return true; - RS.enterBasicBlock(MBB); + RS.enterBasicBlock(*MBB); if (UseAtEnd && !MBB->empty()) { // The scratch register will be used at the end of the block, so must @@ -653,7 +651,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, // Now that we've done our best to provide both registers, double check // whether we were unable to provide enough. - if (BV.count() < (TwoUniqueRegsRequired ? 2 : 1)) + if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) return false; return true; @@ -838,13 +836,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // If we need to spill the CR and the LR but we don't have two separate // registers available, we must spill them one at a time if (MustSaveCR && SingleScratchReg && MustSaveLR) { - // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. - // If only one or two CR fields are clobbered, it could be more - // efficient to use mfocrf to selectively save just those fields. + // In the ELFv2 ABI, we are not required to save all CR fields. + // If only one or two CR fields are clobbered, it is more efficient to use + // mfocrf to selectively save just those fields, because mfocrf has short + // latency compares to mfcr. + unsigned MfcrOpcode = PPC::MFCR8; + unsigned CrState = RegState::ImplicitKill; + if (isELFv2ABI && MustSaveCRs.size() == 1) { + MfcrOpcode = PPC::MFOCRF8; + CrState = RegState::Kill; + } MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); + BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); + MIB.addReg(MustSaveCRs[i], CrState); BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) .addImm(8) @@ -856,13 +861,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 - // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. - // If only one or two CR fields are clobbered, it could be more - // efficient to use mfocrf to selectively save just those fields. + // In the ELFv2 ABI, we are not required to save all CR fields. + // If only one or two CR fields are clobbered, it is more efficient to use + // mfocrf to selectively save just those fields, because mfocrf has short + // latency compares to mfcr. + unsigned MfcrOpcode = PPC::MFCR8; + unsigned CrState = RegState::ImplicitKill; + if (isELFv2ABI && MustSaveCRs.size() == 1) { + MfcrOpcode = PPC::MFOCRF8; + CrState = RegState::Kill; + } MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); + BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); + MIB.addReg(MustSaveCRs[i], CrState); } if (HasFP) @@ -889,7 +901,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, if (MustSaveLR) // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, StoreInst) - .addReg(ScratchReg) + .addReg(ScratchReg, getKillRegState(true)) .addImm(LROffset) .addReg(SPReg); @@ -1315,36 +1327,53 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(FPReg) .addReg(ScratchReg); } - } else if (RetOpcode == PPC::TCRETURNdi) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); - } else if (RetOpcode == PPC::TCRETURNri) { - MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); - } else if (RetOpcode == PPC::TCRETURNai) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); - } else if (RetOpcode == PPC::TCRETURNdi8) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); - } else if (RetOpcode == PPC::TCRETURNri8) { - MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); - } else if (RetOpcode == PPC::TCRETURNai8) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); + } else { + createTailCallBranchInstr(MBB); } } } +void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc dl; + + if (MBBI != MBB.end()) + dl = MBBI->getDebugLoc(); + + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); + + // Create branch instruction for pseudo tail call return instruction + unsigned RetOpcode = MBBI->getOpcode(); + if (RetOpcode == PPC::TCRETURNdi) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri) { + MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); + } else if (RetOpcode == PPC::TCRETURNai) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); + } else if (RetOpcode == PPC::TCRETURNdi8) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri8) { + MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); + } else if (RetOpcode == PPC::TCRETURNai8) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); + } +} + void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -1421,6 +1450,18 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, MachineFrameInfo *FFI = MF.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + // If the function is shrink-wrapped, and if the function has a tail call, the + // tail call might not be in the new RestoreBlock, so real branch instruction + // won't be generated by emitEpilogue(), because shrink-wrap has chosen new + // RestoreBlock. So we handle this case here. + if (FFI->getSavePoint() && FFI->hasTailCall()) { + MachineBasicBlock *RestoreBlock = FFI->getRestorePoint(); + for (MachineBasicBlock &MBB : MF) { + if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) + createTailCallBranchInstr(MBB); + } + } + // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { addScavengingSpillSlot(MF, RS); @@ -1770,7 +1811,7 @@ restoreCRs(bool isPPC64, bool is31, .addReg(MoveReg, getKillRegState(true))); } -void PPCFrameLowering:: +MachineBasicBlock::iterator PPCFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -1787,7 +1828,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); + const DebugLoc &dl = MI->getDebugLoc(); if (isInt<16>(CalleeAmt)) { BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) @@ -1807,7 +1848,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } } // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); + return MBB.erase(I); } bool diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index f1f3f0b831a7..28b0c57f0ffb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -66,6 +66,13 @@ class PPCFrameLowering: public TargetFrameLowering { unsigned *SR2 = nullptr) const; bool twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const; + /** + * \brief Create branch instruction for PPC::TCRETURN* (tail call return) + * + * \param[in] MBB that is terminated by PPC::TCRETURN* + */ + void createTailCallBranchInstr(MachineBasicBlock &MBB) const; + public: PPCFrameLowering(const PPCSubtarget &STI); @@ -93,9 +100,9 @@ public: const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 7234e30fa73e..caab67d68b17 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -162,8 +162,9 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { unsigned Directive = DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective(); // If we're using a special group-terminating nop, then we need only one. + // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || - Directive == PPC::DIR_PWR8 ) + Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9) return 1; return 5 - CurSlots; @@ -223,8 +224,10 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective(); // If the group has now filled all of its slots, or if we're using a special // group-terminating nop, the group is complete. + // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || - Directive == PPC::DIR_PWR8 || CurSlots == 6) { + Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR8 || + CurSlots == 6) { CurGroup.clear(); CurSlots = CurBranches = 0; } else { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 1eaa8118ba0a..0e9b2daa0cb5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -59,10 +59,6 @@ static cl::opt<bool> EnableBranchHint( cl::desc("Enable static hinting of branches on ppc"), cl::Hidden); -namespace llvm { - void initializePPCDAGToDAGISelPass(PassRegistry&); -} - namespace { //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine @@ -75,9 +71,7 @@ namespace { unsigned GlobalBaseReg; public: explicit PPCDAGToDAGISel(PPCTargetMachine &tm) - : SelectionDAGISel(tm), TM(tm) { - initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); - } + : SelectionDAGISel(tm), TM(tm) {} bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary @@ -97,18 +91,18 @@ namespace { /// getI32Imm - Return a target constant with the specified value, of type /// i32. - inline SDValue getI32Imm(unsigned Imm, SDLoc dl) { + inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } /// getI64Imm - Return a target constant with the specified value, of type /// i64. - inline SDValue getI64Imm(uint64_t Imm, SDLoc dl) { + inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i64); } /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) { + inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant( Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } @@ -122,18 +116,19 @@ namespace { /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); - SDNode *getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); + void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDNode *Select(SDNode *N) override; + void Select(SDNode *N) override; - SDNode *SelectBitfieldInsert(SDNode *N); - SDNode *SelectBitPermutation(SDNode *N); + bool tryBitfieldInsert(SDNode *N); + bool tryBitPermutation(SDNode *N); /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. - SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl); + SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, + const SDLoc &dl); /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. @@ -228,7 +223,7 @@ namespace { #include "PPCGenDAGISel.inc" private: - SDNode *SelectSETCC(SDNode *N); + bool trySETCC(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -240,7 +235,7 @@ private: bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); - SDNode *transferMemOperands(SDNode *N, SDNode *Result); + void transferMemOperands(SDNode *N, SDNode *Result); }; } @@ -324,7 +319,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { if (PPCSubTarget->isTargetELF()) { GlobalBaseReg = PPC::R30; - if (M->getPICLevel() == PICLevel::Small) { + if (M->getPICLevel() == PICLevel::SmallPIC) { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); @@ -458,16 +453,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { && isInt32Immediate(N->getOperand(1).getNode(), Imm); } -SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { +void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { SDLoc dl(SN); int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; if (SN->hasOneUse()) - return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, - getSmallIPtrImm(Offset, dl)); - return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, - getSmallIPtrImm(Offset, dl)); + CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, + getSmallIPtrImm(Offset, dl)); + else + ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, + getSmallIPtrImm(Offset, dl))); } bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, @@ -512,9 +508,9 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, return false; } -/// SelectBitfieldInsert - turn an or of two masked values into -/// the rotate left word immediate then mask insert (rlwimi) instruction. -SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { +/// Turn an or of two masked values into the rotate left word immediate then +/// mask insert (rlwimi) instruction. +bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDLoc dl(N); @@ -584,15 +580,16 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SH &= 31; SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops); + ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); + return true; } } - return nullptr; + return false; } // Predict the number of instructions that would be generated by calling -// SelectInt64(N). -static unsigned SelectInt64CountDirect(int64_t Imm) { +// getInt64(N). +static unsigned getInt64CountDirect(int64_t Imm) { // Assume no remaining bits. unsigned Remainder = 0; // Assume no shift required. @@ -653,17 +650,17 @@ static uint64_t Rot64(uint64_t Imm, unsigned R) { return (Imm << R) | (Imm >> (64 - R)); } -static unsigned SelectInt64Count(int64_t Imm) { - unsigned Count = SelectInt64CountDirect(Imm); +static unsigned getInt64Count(int64_t Imm) { + unsigned Count = getInt64CountDirect(Imm); if (Count == 1) return Count; for (unsigned r = 1; r < 63; ++r) { uint64_t RImm = Rot64(Imm, r); - unsigned RCount = SelectInt64CountDirect(RImm) + 1; + unsigned RCount = getInt64CountDirect(RImm) + 1; Count = std::min(Count, RCount); - // See comments in SelectInt64 for an explanation of the logic below. + // See comments in getInt64 for an explanation of the logic below. unsigned LS = findLastSet(RImm); if (LS != r-1) continue; @@ -671,16 +668,17 @@ static unsigned SelectInt64Count(int64_t Imm) { uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); uint64_t RImmWithOnes = RImm | OnesMask; - RCount = SelectInt64CountDirect(RImmWithOnes) + 1; + RCount = getInt64CountDirect(RImmWithOnes) + 1; Count = std::min(Count, RCount); } return Count; } -// Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count +// Select a 64-bit constant. For cost-modeling purposes, getInt64Count // (above) needs to be kept in sync with this function. -static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { +static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, + int64_t Imm) { // Assume no remaining bits. unsigned Remainder = 0; // Assume no shift required. @@ -754,10 +752,10 @@ static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { return Result; } -static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { - unsigned Count = SelectInt64CountDirect(Imm); +static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { + unsigned Count = getInt64CountDirect(Imm); if (Count == 1) - return SelectInt64Direct(CurDAG, dl, Imm); + return getInt64Direct(CurDAG, dl, Imm); unsigned RMin = 0; @@ -766,7 +764,7 @@ static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { for (unsigned r = 1; r < 63; ++r) { uint64_t RImm = Rot64(Imm, r); - unsigned RCount = SelectInt64CountDirect(RImm) + 1; + unsigned RCount = getInt64CountDirect(RImm) + 1; if (RCount < Count) { Count = RCount; RMin = r; @@ -789,7 +787,7 @@ static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); uint64_t RImmWithOnes = RImm | OnesMask; - RCount = SelectInt64CountDirect(RImmWithOnes) + 1; + RCount = getInt64CountDirect(RImmWithOnes) + 1; if (RCount < Count) { Count = RCount; RMin = r; @@ -799,24 +797,24 @@ static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { } if (!RMin) - return SelectInt64Direct(CurDAG, dl, Imm); + return getInt64Direct(CurDAG, dl, Imm); auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; - SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0); + SDValue Val = SDValue(getInt64Direct(CurDAG, dl, MatImm), 0); return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, getI32Imm(64 - RMin), getI32Imm(MaskEnd)); } // Select a 64-bit constant. -static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) { +static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); // Get 64 bit value. int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); - return SelectInt64(CurDAG, dl, Imm); + return getInt64(CurDAG, dl, Imm); } namespace { @@ -1209,7 +1207,7 @@ class BitPermutationSelector { "bit group ends at index 63 but there is another?"); auto IN = BitGroups.begin(); - if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && + if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && IsAllLow32(*I)) { @@ -1252,7 +1250,7 @@ class BitPermutationSelector { } } - SDValue getI32Imm(unsigned Imm, SDLoc dl) { + SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } @@ -1270,7 +1268,7 @@ class BitPermutationSelector { // Depending on the number of groups for a particular value, it might be // better to rotate, mask explicitly (using andi/andis), and then or the // result. Select this part of the result first. - void SelectAndParts32(SDLoc dl, SDValue &Res, unsigned *InstCnt) { + void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { if (BPermRewriterNoMasking) return; @@ -1466,8 +1464,8 @@ class BitPermutationSelector { // For 64-bit values, not all combinations of rotates and masks are // available. Produce one if it is available. - SDValue SelectRotMask64(SDValue V, SDLoc dl, unsigned RLAmt, bool Repl32, - unsigned MaskStart, unsigned MaskEnd, + SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, + bool Repl32, unsigned MaskStart, unsigned MaskEnd, unsigned *InstCnt = nullptr) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. @@ -1527,8 +1525,8 @@ class BitPermutationSelector { // For 64-bit values, not all combinations of rotates and masks are // available. Produce a rotate-mask-and-insert if one is available. - SDValue SelectRotMaskIns64(SDValue Base, SDValue V, SDLoc dl, unsigned RLAmt, - bool Repl32, unsigned MaskStart, + SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, + unsigned RLAmt, bool Repl32, unsigned MaskStart, unsigned MaskEnd, unsigned *InstCnt = nullptr) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. @@ -1574,7 +1572,7 @@ class BitPermutationSelector { return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); } - void SelectAndParts64(SDLoc dl, SDValue &Res, unsigned *InstCnt) { + void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { if (BPermRewriterNoMasking) return; @@ -1646,7 +1644,7 @@ class BitPermutationSelector { NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); else - NumAndInsts += SelectInt64Count(Mask) + /* and */ 1; + NumAndInsts += getInt64Count(Mask) + /* and */ 1; unsigned NumRLInsts = 0; bool FirstBG = true; @@ -1709,7 +1707,7 @@ class BitPermutationSelector { TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ANDIVal, ANDISVal), 0); } else { - TotalVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0); + TotalVal = SDValue(getInt64(CurDAG, dl, Mask), 0); TotalVal = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, VRot, TotalVal), 0); @@ -1852,9 +1850,9 @@ class BitPermutationSelector { Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ANDIVal, ANDISVal), 0); } else { - if (InstCnt) *InstCnt += SelectInt64Count(Mask) + /* and */ 1; + if (InstCnt) *InstCnt += getInt64Count(Mask) + /* and */ 1; - SDValue MaskVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0); + SDValue MaskVal = SDValue(getInt64(CurDAG, dl, Mask), 0); Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, Res, MaskVal), 0); @@ -1955,13 +1953,13 @@ public: }; } // anonymous namespace -SDNode *PPCDAGToDAGISel::SelectBitPermutation(SDNode *N) { +bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) - return nullptr; + return false; if (!UseBitPermRewriter) - return nullptr; + return false; switch (N->getOpcode()) { default: break; @@ -1971,17 +1969,21 @@ SDNode *PPCDAGToDAGISel::SelectBitPermutation(SDNode *N) { case ISD::AND: case ISD::OR: { BitPermutationSelector BPS(CurDAG); - return BPS.Select(N); + if (SDNode *New = BPS.Select(N)) { + ReplaceNode(N, New); + return true; + } + return false; } } - return nullptr; + return false; } /// SelectCC - Select a comparison of the specified values with the specified /// condition code, returning the CR# of the expression. -SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, - ISD::CondCode CC, SDLoc dl) { +SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, + const SDLoc &dl) { // Always select the LHS. unsigned Opc; @@ -2255,7 +2257,7 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, } } -SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { +bool PPCDAGToDAGISel::trySETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); @@ -2276,20 +2278,22 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return true; } case ISD::SETNE: { if (isPPC64) break; SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U, dl)), 0); - return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, - AD.getValue(1)); + CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); + return true; } case ISD::SETLT: { SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return true; } case ISD::SETGT: { SDValue T = @@ -2297,7 +2301,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return true; } } } else if (Imm == ~0U) { // setcc op, -1 @@ -2308,18 +2313,20 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(1, dl)), 0); - return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, - SDValue(CurDAG->getMachineNode(PPC::LI, dl, - MVT::i32, - getI32Imm(0, dl)), - 0), Op.getValue(1)); + CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + SDValue(CurDAG->getMachineNode(PPC::LI, dl, + MVT::i32, + getI32Imm(0, dl)), + 0), Op.getValue(1)); + return true; case ISD::SETNE: { if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U, dl)); - return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), - Op, SDValue(AD, 1)); + CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, + SDValue(AD, 1)); + return true; } case ISD::SETLT: { SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, @@ -2328,14 +2335,15 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { Op), 0); SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return true; } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); - return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, - getI32Imm(1, dl)); + CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); + return true; } } } @@ -2348,7 +2356,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { if (PPCSubTarget->hasQPX()) - return nullptr; + return false; EVT VecVT = LHS.getValueType(); bool Swap, Negate; @@ -2360,16 +2368,17 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { EVT ResVT = VecVT.changeVectorElementTypeToInteger(); if (Negate) { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : - PPC::VNOR, - ResVT, VCmp, VCmp); + CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, + ResVT, VCmp, VCmp); + return true; } - return CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); + CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); + return true; } if (PPCSubTarget->useCRBits()) - return nullptr; + return false; bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); @@ -2388,31 +2397,33 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; - if (!Inv) - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + if (!Inv) { + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return true; + } // Get the specified bit. SDValue Tmp = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); - return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); + CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); + return true; } -SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { +void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - return Result; } // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -SDNode *PPCDAGToDAGISel::Select(SDNode *N) { +void PPCDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); - return nullptr; // Already selected. + return; // Already selected. } // In case any misguided DAG-level optimizations form an ADD with a @@ -2423,39 +2434,44 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { llvm_unreachable("Invalid ADD with TargetConstant operand"); // Try matching complex bit permutations before doing anything else. - if (SDNode *NN = SelectBitPermutation(N)) - return NN; + if (tryBitPermutation(N)) + return; switch (N->getOpcode()) { default: break; case ISD::Constant: { - if (N->getValueType(0) == MVT::i64) - return SelectInt64(CurDAG, N); + if (N->getValueType(0) == MVT::i64) { + ReplaceNode(N, getInt64(CurDAG, N)); + return; + } break; } case ISD::SETCC: { - SDNode *SN = SelectSETCC(N); - if (SN) - return SN; + if (trySETCC(N)) + return; break; } case PPCISD::GlobalBaseReg: - return getGlobalBaseReg(); + ReplaceNode(N, getGlobalBaseReg()); + return; case ISD::FrameIndex: - return getFrameIndex(N, N); + selectFrameIndex(N, N); + return; case PPCISD::MFOCRF: { SDValue InFlag = N->getOperand(1); - return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, - N->getOperand(0), InFlag); + ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, + N->getOperand(0), InFlag)); + return; } case PPCISD::READ_TIME_BASE: { - return CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, - MVT::Other, N->getOperand(0)); + ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, + MVT::Other, N->getOperand(0))); + return; } case PPCISD::SRA_ADDZE: { @@ -2468,16 +2484,18 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Op = CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, N0, ShiftAmt); - return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, - SDValue(Op, 0), SDValue(Op, 1)); + CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), + SDValue(Op, 1)); + return; } else { assert(N->getValueType(0) == MVT::i32 && "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); SDNode *Op = CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, N0, ShiftAmt); - return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, - SDValue(Op, 0), SDValue(Op, 1)); + CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), + SDValue(Op, 1)); + return; } } @@ -2524,11 +2542,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; - return transferMemOperands( - N, CurDAG->getMachineNode( - Opcode, dl, LD->getValueType(0), - PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, - Ops)); + SDNode *MN = CurDAG->getMachineNode( + Opcode, dl, LD->getValueType(0), + PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); + transferMemOperands(N, MN); + ReplaceNode(N, MN); + return; } else { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; @@ -2563,11 +2582,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; - return transferMemOperands( - N, CurDAG->getMachineNode( - Opcode, dl, LD->getValueType(0), - PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, - Ops)); + SDNode *MN = CurDAG->getMachineNode( + Opcode, dl, LD->getValueType(0), + PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); + transferMemOperands(N, MN); + ReplaceNode(N, MN); + return; } } @@ -2582,7 +2602,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Val = N->getOperand(0).getOperand(0); SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return; } // If this is just a masked value where the input is not handled above, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm @@ -2592,7 +2613,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Val = N->getOperand(0); SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return; } // If this is a 64-bit zero-extension mask, emit rldicl. if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && @@ -2614,12 +2636,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return; } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); - return nullptr; + return; } // ISD::OR doesn't get all the bitfield insertion fun. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a @@ -2645,7 +2668,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { N->getOperand(0).getOperand(1), getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops); + ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); + return; } } @@ -2654,8 +2678,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::OR: { if (N->getValueType(0) == MVT::i32) - if (SDNode *I = SelectBitfieldInsert(N)) - return I; + if (tryBitfieldInsert(N)) + return; short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && @@ -2665,8 +2689,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. - if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) - return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { + selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + return; + } } // Other cases are autogenerated. @@ -2675,8 +2701,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case ISD::ADD: { short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && - isIntS16Immediate(N->getOperand(1), Imm)) - return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + isIntS16Immediate(N->getOperand(1), Imm)) { + selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + return; + } break; } @@ -2687,7 +2715,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return; } // Other cases are autogenerated. @@ -2700,7 +2729,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return; } // Other cases are autogenerated. @@ -2726,9 +2756,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, dl, MVT::i32); - return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, - CR0Reg, SRIdxVal, - SDValue(AndI.getNode(), 1) /* glue */); + CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, + SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); + return; } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); @@ -2753,9 +2783,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, N->getOperand(0), getI32Imm(~0U, dl)); - return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, - SDValue(Tmp, 0), N->getOperand(0), - SDValue(Tmp, 1)); + CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), + N->getOperand(0), SDValue(Tmp, 1)); + return; } SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); @@ -2786,7 +2816,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, NotC, N->getOperand(3)), 0); - return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); + CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); + return; } unsigned BROpc = getPredicateForSetCC(CC); @@ -2820,12 +2851,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), getI32Imm(BROpc, dl) }; - return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); + CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); + return; } case ISD::VSELECT: if (PPCSubTarget->hasVSX()) { SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; - return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); + CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); + return; } break; @@ -2856,8 +2889,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; - return CurDAG->SelectNodeTo(N, PPC::LXVDSX, - N->getValueType(0), Ops); + CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops); + return; } } @@ -2873,7 +2906,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, MVT::i32); SDValue Ops[] = { Op1, Op2, DMV }; - return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); + CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); + return; } break; @@ -2881,10 +2915,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::BDZ: { bool IsPPC64 = PPCSubTarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; - return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? - (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : - (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), - MVT::Other, Ops); + CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ + ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) + : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), + MVT::Other, Ops); + return; } case PPCISD::COND_BRANCH: { // Op #0 is the Chain. @@ -2900,7 +2935,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getI32Imm(PCC, dl); SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), N->getOperand(0), N->getOperand(4) }; - return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); + CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); + return; } case ISD::BR_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); @@ -2922,8 +2958,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, N->getOperand(Swap ? 3 : 2), N->getOperand(Swap ? 2 : 3)), 0); - return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, - BitComp, N->getOperand(4), N->getOperand(0)); + CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), + N->getOperand(0)); + return; } if (EnableBranchHint) @@ -2932,7 +2969,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, N->getOperand(4), N->getOperand(0) }; - return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); + CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); + return; } case ISD::BRIND: { // FIXME: Should custom lower this. @@ -2942,15 +2980,19 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, Chain), 0); - return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); + CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); + return; } case PPCISD::TOC_ENTRY: { assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && "Only supported for 64-bit ABI and 32-bit SVR4"); if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { SDValue GA = N->getOperand(0); - return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl, - MVT::i32, GA, N->getOperand(1))); + SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, + N->getOperand(1)); + transferMemOperands(N, MN); + ReplaceNode(N, MN); + return; } // For medium and large code model, we generate two instructions as @@ -2971,29 +3013,38 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { TOCbase, GA); if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) || - CModel == CodeModel::Large) - return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, - MVT::i64, GA, SDValue(Tmp, 0))); + CModel == CodeModel::Large) { + SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, + SDValue(Tmp, 0)); + transferMemOperands(N, MN); + ReplaceNode(N, MN); + return; + } if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { const GlobalValue *GV = G->getGlobal(); unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); if (GVFlags & PPCII::MO_NLP_FLAG) { - return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, - MVT::i64, GA, SDValue(Tmp, 0))); + SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, + SDValue(Tmp, 0)); + transferMemOperands(N, MN); + ReplaceNode(N, MN); + return; } } - return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, - SDValue(Tmp, 0), GA); + ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, + SDValue(Tmp, 0), GA)); + return; } case PPCISD::PPC32_PICGOT: { // Generate a PIC-safe GOT reference. assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); - return CurDAG->SelectNodeTo( - N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), - MVT::i32); + CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, + PPCLowering->getPointerTy(CurDAG->getDataLayout()), + MVT::i32); + return; } case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether @@ -3035,7 +3086,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue EltVal = getI32Imm(Elt >> 1, dl); SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); SDValue TmpVal = SDValue(Tmp, 0); - return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); + return; } else if (Elt > 0) { // Elt is odd and positive, in the range [17,31]. @@ -3048,8 +3100,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); EltVal = getI32Imm(-16, dl); SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); - return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), - SDValue(Tmp2, 0)); + ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0))); + return; } else { // Elt is odd and negative, in the range [-31,-17]. @@ -3062,13 +3115,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); EltVal = getI32Imm(-16, dl); SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); - return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), - SDValue(Tmp2, 0)); + ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0))); + return; } } } - return SelectCode(N); + SelectCode(N); } // If the target supports the cmpb instruction, do the idiom recognition here. @@ -3565,11 +3619,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) + else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)), + MachineNode->getOperand(1)); SelectSwap = true; + } break; case PPC::CRNAND: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -3603,11 +3658,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(1). getOperand(0), MachineNode->getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) + else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)), + MachineNode->getOperand(1)); SelectSwap = true; + } break; case PPC::CROR: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -3635,11 +3691,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) + else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)), + MachineNode->getOperand(1)); SelectSwap = true; + } break; case PPC::CRXOR: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -3674,11 +3731,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) + else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)), + MachineNode->getOperand(1)); SelectSwap = true; + } break; case PPC::CRNOR: if (Op1Set || Op2Set) @@ -3707,11 +3765,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(1). getOperand(0), MachineNode->getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) + else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)), + MachineNode->getOperand(1)); SelectSwap = true; + } break; case PPC::CREQV: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -3746,11 +3805,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) + else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)), + MachineNode->getOperand(1)); SelectSwap = true; + } break; case PPC::CRANDC: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -3781,11 +3841,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) + else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), - MachineNode->getOperand(0)), + MachineNode->getOperand(0)); SelectSwap = true; + } break; case PPC::CRORC: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -3816,11 +3877,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) + else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), - MachineNode->getOperand(0)), + MachineNode->getOperand(0)); SelectSwap = true; + } break; case PPC::SELECT_I4: case PPC::SELECT_I8: @@ -4365,15 +4427,3 @@ void PPCDAGToDAGISel::PeepholePPC64() { FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { return new PPCDAGToDAGISel(TM); } - -static void initializePassOnce(PassRegistry &Registry) { - const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; - PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, - nullptr, false, false); - Registry.registerPass(*PI, true); -} - -void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializePassOnce); -} - diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index af9ad077a7ce..6e3c830a8243 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14,11 +14,13 @@ #include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCCallingConv.h" +#include "PPCCCState.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" #include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -36,12 +38,16 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" +#include <list> using namespace llvm; +#define DEBUG_TYPE "ppc-lowering" + static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -51,6 +57,12 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); +static cl::opt<bool> DisableSCO("disable-ppc-sco", +cl::desc("disable sibling call optimization on ppc"), cl::Hidden); + +STATISTIC(NumTailCalls, "Number of tail calls"); +STATISTIC(NumSiblingCalls, "Number of sibling calls"); + // FIXME: Remove this once the bug has been fixed! extern cl::opt<bool> ANDIGlueBug; @@ -68,7 +80,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); - if (!Subtarget.useSoftFloat()) { + if (!useSoftFloat()) { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } @@ -207,14 +219,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // PowerPC does not have BSWAP, CTPOP or CTTZ setOperationAction(ISD::BSWAP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - if (Subtarget.hasPOPCNTD()) { + if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { setOperationAction(ISD::CTPOP, MVT::i32 , Legal); setOperationAction(ISD::CTPOP, MVT::i64 , Legal); } else { @@ -255,7 +263,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - if (Subtarget.hasDirectMove()) { + if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::BITCAST, MVT::f32, Legal); setOperationAction(ISD::BITCAST, MVT::i32, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Legal); @@ -479,9 +487,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); @@ -557,7 +563,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); } - if (Subtarget.hasDirectMove()) { + if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); @@ -647,6 +653,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); + setOperationAction(ISD::FNEG, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Legal); + setOperationAction(ISD::FABS, MVT::v4f32, Legal); + setOperationAction(ISD::FABS, MVT::v2f64, Legal); + addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } @@ -654,6 +665,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } + if (Subtarget.hasP9Vector()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); + } } if (Subtarget.hasQPX()) { @@ -840,6 +855,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget.hasFPCVT()) setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::LOAD); @@ -906,13 +922,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: + case PPC::DIR_PWR9: setPrefFunctionAlignment(4); setPrefLoopAlignment(4); break; } - setInsertFencesForAtomic(true); - if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); else @@ -1006,6 +1021,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::VPERM: return "PPCISD::VPERM"; + case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; + case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; @@ -1030,6 +1048,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MFVSR: return "PPCISD::MFVSR"; case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; + case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; + case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; @@ -1069,6 +1089,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; + case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; @@ -1480,6 +1501,91 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } +bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE) { + + // Check that the mask is shuffling words + for (unsigned i = 0; i < 4; ++i) { + unsigned B0 = N->getMaskElt(i*4); + unsigned B1 = N->getMaskElt(i*4+1); + unsigned B2 = N->getMaskElt(i*4+2); + unsigned B3 = N->getMaskElt(i*4+3); + if (B0 % 4) + return false; + if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) + return false; + } + + // Now we look at mask elements 0,4,8,12 + unsigned M0 = N->getMaskElt(0) / 4; + unsigned M1 = N->getMaskElt(4) / 4; + unsigned M2 = N->getMaskElt(8) / 4; + unsigned M3 = N->getMaskElt(12) / 4; + unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; + unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; + + // Below, let H and L be arbitrary elements of the shuffle mask + // where H is in the range [4,7] and L is in the range [0,3]. + // H, 1, 2, 3 or L, 5, 6, 7 + if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || + (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { + ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; + InsertAtByte = IsLE ? 12 : 0; + Swap = M0 < 4; + return true; + } + // 0, H, 2, 3 or 4, L, 6, 7 + if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || + (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { + ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; + InsertAtByte = IsLE ? 8 : 4; + Swap = M1 < 4; + return true; + } + // 0, 1, H, 3 or 4, 5, L, 7 + if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || + (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { + ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; + InsertAtByte = IsLE ? 4 : 8; + Swap = M2 < 4; + return true; + } + // 0, 1, 2, H or 4, 5, 6, L + if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || + (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { + ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; + InsertAtByte = IsLE ? 0 : 12; + Swap = M3 < 4; + return true; + } + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + ShiftElts = 0; + Swap = true; + unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; + if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { + InsertAtByte = IsLE ? 12 : 0; + return true; + } + if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { + InsertAtByte = IsLE ? 8 : 4; + return true; + } + if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { + InsertAtByte = IsLE ? 4 : 8; + return true; + } + if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { + InsertAtByte = IsLE ? 0 : 12; + return true; + } + } + + return false; +} + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -1511,7 +1617,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // See if all of the elements in the buildvector agree across. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (N->getOperand(i).isUndef()) continue; // If the element isn't a constant, bail fully out. if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); @@ -1557,7 +1663,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (N->getOperand(i).isUndef()) continue; if (!OpVal.getNode()) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) @@ -1950,19 +2056,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, // LowerOperation implementation //===----------------------------------------------------------------------===// -/// GetLabelAccessInfo - Return true if we should reference labels using a -/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. -static bool GetLabelAccessInfo(const TargetMachine &TM, - const PPCSubtarget &Subtarget, +/// Return true if we should reference labels using a PICBase, set the HiOpFlags +/// and LoOpFlags to the target MO flags. +static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV = nullptr) { HiOpFlags = PPCII::MO_HA; LoOpFlags = PPCII::MO_LO; // Don't use the pic base if not in PIC relocation model. - bool isPIC = TM.getRelocationModel() == Reloc::PIC_; - - if (isPIC) { + if (IsPIC) { HiOpFlags |= PPCII::MO_PIC_FLAG; LoOpFlags |= PPCII::MO_PIC_FLAG; } @@ -1978,8 +2081,6 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; } } - - return isPIC; } static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, @@ -2010,7 +2111,7 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } -static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit, +static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, SDValue GA) { EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : @@ -2038,10 +2139,10 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, } unsigned MOHiFlag, MOLoFlag; - bool isPIC = - GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); - if (isPIC && Subtarget.isSVR4ABI()) { + if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(CP), false, GA); @@ -2051,7 +2152,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); SDValue CPILo = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); - return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); + return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { @@ -2067,10 +2168,10 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { } unsigned MOHiFlag, MOLoFlag; - bool isPIC = - GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); - if (isPIC && Subtarget.isSVR4ABI()) { + if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(GA), false, GA); @@ -2078,7 +2179,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); - return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); + return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, @@ -2096,11 +2197,11 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, } unsigned MOHiFlag, MOLoFlag; - bool isPIC = - GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag); + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); - return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); + return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, @@ -2160,7 +2261,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, GOTReg, TGA); } else { - if (picLevel == PICLevel::Small) + if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); @@ -2178,7 +2279,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, GOTReg, TGA); } else { - if (picLevel == PICLevel::Small) + if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); @@ -2209,10 +2310,10 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, } unsigned MOHiFlag, MOLoFlag; - bool isPIC = - GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV); + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); - if (isPIC && Subtarget.isSVR4ABI()) { + if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); @@ -2224,13 +2325,12 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); - SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); + SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); // If the global reference is actually to a non-lazy-pointer, we have to do an // extra load to get the address of the global. if (MOHiFlag & PPCII::MO_NLP_FLAG) - Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), - false, false, false, 0); + Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); return Ptr; } @@ -2260,7 +2360,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { } // If we're comparing for equality to zero, expose the fact that this is - // implented as a ctlz/srl pair on ppc, so that the dag combiner can + // implemented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { if (C->isNullValue() && CC == ISD::SETEQ) { @@ -2298,11 +2398,10 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const { +SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); @@ -2312,8 +2411,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // gpr_index SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, - VAListPtr, MachinePointerInfo(SV), MVT::i8, - false, false, false, 0); + VAListPtr, MachinePointerInfo(SV), MVT::i8); InChain = GprIndex.getValue(1); if (VT == MVT::i64) { @@ -2335,8 +2433,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // fpr SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, - FprPtr, MachinePointerInfo(SV), MVT::i8, - false, false, false, 0); + FprPtr, MachinePointerInfo(SV), MVT::i8); InChain = FprIndex.getValue(1); SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, @@ -2346,14 +2443,12 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, DAG.getConstant(4, dl, MVT::i32)); // areas - SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, - MachinePointerInfo(), false, false, - false, 0); + SDValue OverflowArea = + DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); InChain = OverflowArea.getValue(1); - SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, - MachinePointerInfo(), false, false, - false, 0); + SDValue RegSaveArea = + DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); InChain = RegSaveArea.getValue(1); // select overflow_area if index > 8 @@ -2383,8 +2478,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, VT.isInteger() ? VAListPtr : FprPtr, - MachinePointerInfo(SV), - MVT::i8, false, false, 0); + MachinePointerInfo(SV), MVT::i8); // determine if we should load from reg_save_area or overflow_area SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); @@ -2397,17 +2491,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, OverflowAreaPlusN); - InChain = DAG.getTruncStore(InChain, dl, OverflowArea, - OverflowAreaPtr, - MachinePointerInfo(), - MVT::i32, false, false, 0); + InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, + MachinePointerInfo(), MVT::i32); - return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), - false, false, false, 0); + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); } -SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const { +SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); // We have to copy the entire va_list struct: @@ -2431,7 +2521,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl(Op); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); @@ -2454,28 +2544,26 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__trampoline_setup", PtrVT), - std::move(Args), 0); + std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.second; } -SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const { +SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); SDLoc dl(Op); if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), - MachinePointerInfo(SV), - false, false, 0); + MachinePointerInfo(SV)); } // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. @@ -2504,9 +2592,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); - SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), PtrVT); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), @@ -2524,35 +2609,29 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); // Store first byte : number of int regs - SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, - Op.getOperand(1), - MachinePointerInfo(SV), - MVT::i8, false, false, 0); + SDValue firstStore = + DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), + MachinePointerInfo(SV), MVT::i8); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = - DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, - MachinePointerInfo(SV, nextOffset), MVT::i8, - false, false, 0); + DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, + MachinePointerInfo(SV, nextOffset), MVT::i8); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack - SDValue thirdStore = - DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, - MachinePointerInfo(SV, nextOffset), - false, false, 0); + SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, + MachinePointerInfo(SV, nextOffset)); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers return DAG.getStore(thirdStore, dl, FR, nextPtr, - MachinePointerInfo(SV, nextOffset), - false, false, 0); - + MachinePointerInfo(SV, nextOffset)); } #include "PPCGenCallingConv.inc" @@ -2762,14 +2841,10 @@ static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, return NumBytes; } -SDValue -PPCTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) - const { +SDValue PPCTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, @@ -2783,14 +2858,10 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, } } -SDValue -PPCTargetLowering::LowerFormalArguments_32SVR4( - SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ @@ -2825,7 +2896,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -2833,14 +2904,17 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrByteSize); + if (useSoftFloat()) + CCInfo.PreAnalyzeFormalArguments(Ins); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); + CCInfo.clearWasPPCF128(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -2908,9 +2982,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo(), - false, false, false, 0)); + InVals.push_back( + DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); } } @@ -2955,7 +3028,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); - if (Subtarget.useSoftFloat()) + if (useSoftFloat()) NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); @@ -2973,8 +3046,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // The fixed integer arguments of a variadic function are stored to the - // VarArgsFrameIndex on the stack so that they may be loaded by deferencing - // the result of va_next. + // VarArgsFrameIndex on the stack so that they may be loaded by + // dereferencing the result of va_next. for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); @@ -2982,8 +3055,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); @@ -3001,8 +3074,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, @@ -3019,10 +3092,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. -SDValue -PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, - SelectionDAG &DAG, SDValue ArgVal, - SDLoc dl) const { +SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, + EVT ObjectVT, SelectionDAG &DAG, + SDValue ArgVal, + const SDLoc &dl) const { if (Flags.isSExt()) ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); @@ -3033,14 +3106,10 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } -SDValue -PPCTargetLowering::LowerFormalArguments_64SVR4( - SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // bool isELFv2ABI = Subtarget.isELFv2ABI(); @@ -3052,7 +3121,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -3199,15 +3268,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, - MachinePointerInfo(&*FuncArg), ObjType, - false, false, 0); + MachinePointerInfo(&*FuncArg), ObjType); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area // slot. - Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(&*FuncArg), false, false, 0); + Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(&*FuncArg)); } MemOps.push_back(Store); @@ -3234,9 +3301,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Off = DAG.getConstant(j, dl, PtrVT); Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, Addr, - MachinePointerInfo(&*FuncArg, j), false, false, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, + MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; } @@ -3402,8 +3468,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( CurArgOffset += ArgSize - ObjSize; int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), - false, false, false, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); @@ -3434,14 +3499,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing the - // result of va_next. + // to their spots on the stack so that they may be loaded by dereferencing + // the result of va_next. for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx < Num_GPR_Regs; ++GPR_idx) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); @@ -3455,21 +3520,17 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( return Chain; } -SDValue -PPCTargetLowering::LowerFormalArguments_Darwin( - SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue PPCTargetLowering::LowerFormalArguments_Darwin( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && @@ -3613,9 +3674,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; - SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(&*FuncArg), - ObjType, false, false, 0); + SDValue Store = + DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(&*FuncArg), ObjType); MemOps.push_back(Store); ++GPR_idx; } @@ -3637,9 +3698,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(&*FuncArg, j), false, false, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; @@ -3760,8 +3820,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset + (ArgSize - ObjSize), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), - false, false, false, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); @@ -3795,8 +3854,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing the - // result of va_next. + // to their spots on the stack so that they may be loaded by dereferencing + // the result of va_next. for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { unsigned VReg; @@ -3806,8 +3865,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); @@ -3838,6 +3897,176 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, return SPDiff; } +static bool isFunctionGlobalAddress(SDValue Callee); + +static bool +resideInSameModule(SDValue Callee, Reloc::Model RelMod) { + // If !G, Callee can be an external symbol. + GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); + if (!G) return false; + + const GlobalValue *GV = G->getGlobal(); + + if (GV->isDeclaration()) return false; + + switch(GV->getLinkage()) { + default: llvm_unreachable("unknow linkage type"); + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::ExternalWeakLinkage: + return false; + + // Callee with weak linkage is allowed if it has hidden or protected + // visibility + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation + if (GV->hasDefaultVisibility()) + return false; + + case GlobalValue::ExternalLinkage: + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + break; + } + + // With '-fPIC', calling default visiblity function need insert 'nop' after + // function call, no matter that function resides in same module or not, so + // we treat it as in different module. + if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) + return false; + + return true; +} + +static bool +needStackSlotPassParameters(const PPCSubtarget &Subtarget, + const SmallVectorImpl<ISD::OutputArg> &Outs) { + assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); + + const unsigned PtrByteSize = 8; + const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + + static const MCPhysReg GPR[] = { + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + static const MCPhysReg VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + + const unsigned NumGPRs = array_lengthof(GPR); + const unsigned NumFPRs = 13; + const unsigned NumVRs = array_lengthof(VR); + const unsigned ParamAreaSize = NumGPRs * PtrByteSize; + + unsigned NumBytes = LinkageSize; + unsigned AvailableFPRs = NumFPRs; + unsigned AvailableVRs = NumVRs; + + for (const ISD::OutputArg& Param : Outs) { + if (Param.Flags.isNest()) continue; + + if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, + PtrByteSize, LinkageSize, ParamAreaSize, + NumBytes, AvailableFPRs, AvailableVRs, + Subtarget.hasQPX())) + return true; + } + return false; +} + +static bool +hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { + if (CS->arg_size() != CallerFn->getArgumentList().size()) + return false; + + ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); + ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end(); + Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); + + for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { + const Value* CalleeArg = *CalleeArgIter; + const Value* CallerArg = &(*CallerArgIter); + if (CalleeArg == CallerArg) + continue; + + // e.g. @caller([4 x i64] %a, [4 x i64] %b) { + // tail call @callee([4 x i64] undef, [4 x i64] %b) + // } + // 1st argument of callee is undef and has the same type as caller. + if (CalleeArg->getType() == CallerArg->getType() && + isa<UndefValue>(CalleeArg)) + continue; + + return false; + } + + return true; +} + +bool +PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( + SDValue Callee, + CallingConv::ID CalleeCC, + ImmutableCallSite *CS, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; + + if (DisableSCO && !TailCallOpt) return false; + + // Variadic argument functions are not supported. + if (isVarArg) return false; + + MachineFunction &MF = DAG.getMachineFunction(); + CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); + + // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has + // the same calling convention + if (CallerCC != CalleeCC) return false; + + // SCO support C calling convention + if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C) + return false; + + // Functions containing by val parameters are not supported. + if (std::any_of(Ins.begin(), Ins.end(), + [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); })) + return false; + + // No TCO/SCO on indirect call because Caller have to restore its TOC + if (!isFunctionGlobalAddress(Callee) && + !isa<ExternalSymbolSDNode>(Callee)) + return false; + + // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI + // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another + // module. + // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 + if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) + return false; + + // TCO allows altering callee ABI, so we don't have to check further. + if (CalleeCC == CallingConv::Fast && TailCallOpt) + return true; + + if (DisableSCO) return false; + + // If callee use the same argument list that caller is using, then we can + // apply SCO on this case. If it is not, then we need to check if callee needs + // stack for passing arguments. + if (!hasSameArgumentList(MF.getFunction(), CS) && + needStackSlotPassParameters(Subtarget, Outs)) { + return false; + } + + return true; +} + /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. @@ -3888,9 +4117,11 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { SignExtend32<26>(Addr) != Addr) return nullptr; // Top 6 bits have to be sext of immediate. - return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op), - DAG.getTargetLoweringInfo().getPointerTy( - DAG.getDataLayout())).getNode(); + return DAG + .getConstant( + (int)C->getZExtValue() >> 2, SDLoc(Op), + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) + .getNode(); } namespace { @@ -3905,12 +4136,10 @@ struct TailCallArgumentInfo { } /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. -static void -StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, - SDValue Chain, - const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, - SmallVectorImpl<SDValue> &MemOpChains, - SDLoc dl) { +static void StoreTailCallArgumentsToStackSlot( + SelectionDAG &DAG, SDValue Chain, + const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, + SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; SDValue FIN = TailCallArgs[i].FrameIdxOp; @@ -3918,48 +4147,40 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, // Store relative to framepointer. MemOpChains.push_back(DAG.getStore( Chain, dl, Arg, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, - false, 0)); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); } } /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to /// the appropriate stack slot for the tail call optimized function call. -static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, - MachineFunction &MF, - SDValue Chain, - SDValue OldRetAddr, - SDValue OldFP, - int SPDiff, - bool isPPC64, - bool isDarwinABI, - SDLoc dl) { +static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, + SDValue OldRetAddr, SDValue OldFP, + int SPDiff, const SDLoc &dl) { if (SPDiff) { // Calculate the new stack slot for the return address. + MachineFunction &MF = DAG.getMachineFunction(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); + const PPCFrameLowering *FL = Subtarget.getFrameLowering(); + bool isPPC64 = Subtarget.isPPC64(); int SlotSize = isPPC64 ? 8 : 4; - const PPCFrameLowering *FL = - MF.getSubtarget<PPCSubtarget>().getFrameLowering(); int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); - Chain = DAG.getStore( - Chain, dl, OldRetAddr, NewRetAddrFrIdx, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr), - false, false, 0); + Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, + MachinePointerInfo::getFixedStack(MF, NewRetAddr)); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. - if (isDarwinABI) { + if (Subtarget.isDarwinABI()) { int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); - Chain = DAG.getStore( - Chain, dl, OldFP, NewFramePtrIdx, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx), - false, false, 0); + Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), NewFPIdx)); } } return Chain; @@ -3986,27 +4207,21 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address /// stack slot. Returns the chain as result and the loaded frame pointers in /// LROpOut/FPOpout. Used when tail calling. -SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, - int SPDiff, - SDValue Chain, - SDValue &LROpOut, - SDValue &FPOpOut, - bool isDarwinABI, - SDLoc dl) const { +SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( + SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, + SDValue &FPOpOut, const SDLoc &dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); - LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), - false, false, false, 0); + LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack // slot as the FP is never overwritten. - if (isDarwinABI) { + if (Subtarget.isDarwinABI()) { FPOpOut = getFramePointerFrameIndex(DAG); - FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), - false, false, false, 0); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); Chain = SDValue(FPOpOut.getNode(), 1); } } @@ -4019,10 +4234,9 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, /// a byval function parameter. /// Sometimes what we are copying is the end of a larger object, the part that /// does not fit in registers. -static SDValue -CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, - ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - SDLoc dl) { +static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, + SDValue Chain, ISD::ArgFlagsTy Flags, + SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), false, false, false, MachinePointerInfo(), @@ -4031,13 +4245,11 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of /// tail calls. -static void -LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, - SDValue Arg, SDValue PtrOff, int SPDiff, - unsigned ArgOffset, bool isPPC64, bool isTailCall, - bool isVector, SmallVectorImpl<SDValue> &MemOpChains, - SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, - SDLoc dl) { +static void LowerMemOpCallTo( + SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, + SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, + bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, + SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); if (!isTailCall) { if (isVector) { @@ -4049,20 +4261,18 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, dl, PtrVT)); } - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), false, false, 0)); - // Calculate and remember argument location. + MemOpChains.push_back( + DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); + // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); } -static -void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, - SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, - SDValue LROp, SDValue FPOp, bool isDarwinABI, - SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { - MachineFunction &MF = DAG.getMachineFunction(); - +static void +PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, + const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, + SDValue FPOp, + SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { // Emit a sequence of copyto/copyfrom virtual registers for arguments that // might overwrite each other in case of tail call optimization. SmallVector<SDValue, 8> MemOpChains2; @@ -4074,8 +4284,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. - Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, - isPPC64, isDarwinABI, dl); + Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); // Emit callseq_end just before tailcall node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), @@ -4091,19 +4300,19 @@ static bool isFunctionGlobalAddress(SDValue Callee) { Callee.getOpcode() == ISD::TargetGlobalTLSAddress) return false; - return G->getGlobal()->getType()->getElementType()->isFunctionTy(); + return G->getGlobal()->getValueType()->isFunctionTy(); } return false; } -static -unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, - SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff, - bool isTailCall, bool IsPatchPoint, bool hasNest, - SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, - SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, - ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { +static unsigned +PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, + SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, + bool isPatchPoint, bool hasNest, + SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, + SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, + ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); @@ -4123,23 +4332,24 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, needIndirectCall = false; } + // PC-relative references to external symbols should go through $stub, unless + // we're building with the leopard linker or later, which automatically + // synthesizes these stubs. + const TargetMachine &TM = DAG.getTarget(); + const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); + const GlobalValue *GV = nullptr; + if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) + GV = G->getGlobal(); + bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); + bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; + if (isFunctionGlobalAddress(Callee)) { GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); // A call to a TLS address is actually an indirect call to a // thread-specific pointer. unsigned OpFlags = 0; - if ((DAG.getTarget().getRelocationModel() != Reloc::Static && - (Subtarget.getTargetTriple().isMacOSX() && - Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && - !G->getGlobal()->isStrongDefinitionForLinker()) || - (Subtarget.isTargetELF() && !isPPC64 && - !G->getGlobal()->hasLocalLinkage() && - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { - // PC-relative references to external symbols should go through $stub, - // unless we're building with the leopard linker or later, which - // automatically synthesizes these stubs. - OpFlags = PPCII::MO_PLT_OR_STUB; - } + if (UsePlt) + OpFlags = PPCII::MO_PLT; // If the callee is a GlobalAddress/ExternalSymbol node (quite common, // every direct call is) turn it into a TargetGlobalAddress / @@ -4152,23 +4362,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { unsigned char OpFlags = 0; - if ((DAG.getTarget().getRelocationModel() != Reloc::Static && - (Subtarget.getTargetTriple().isMacOSX() && - Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || - (Subtarget.isTargetELF() && !isPPC64 && - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { - // PC-relative references to external symbols should go through $stub, - // unless we're building with the leopard linker or later, which - // automatically synthesizes these stubs. - OpFlags = PPCII::MO_PLT_OR_STUB; - } + if (UsePlt) + OpFlags = PPCII::MO_PLT; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), OpFlags); needIndirectCall = false; } - if (IsPatchPoint) { + if (isPatchPoint) { // We'll form an invalid direct call when lowering a patchpoint; the full // sequence for an indirect call is complicated, and many of the // instructions introduced might have side effects (and, thus, can't be @@ -4217,24 +4419,26 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (LDChain.getValueType() == MVT::Glue) LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); - bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors(); + auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() + ? MachineMemOperand::MOInvariant + : MachineMemOperand::MONone; MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr); SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, - false, false, LoadsInv, 8); + /* Alignment = */ 8, MMOFlags); // Load environment pointer into r11. SDValue PtrOff = DAG.getIntPtrConstant(16, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); - SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, - MPI.getWithOffset(16), false, false, - LoadsInv, 8); + SDValue LoadEnvPtr = + DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), + /* Alignment = */ 8, MMOFlags); SDValue TOCOff = DAG.getIntPtrConstant(8, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); - SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, - MPI.getWithOffset(8), false, false, - LoadsInv, 8); + SDValue TOCPtr = + DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), + /* Alignment = */ 8, MMOFlags); setUsesTOCBasePtr(DAG); SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, @@ -4292,7 +4496,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live // into the call. - if (isSVR4ABI && isPPC64 && !IsPatchPoint) { + if (isSVR4ABI && isPPC64 && !isPatchPoint) { setUsesTOCBasePtr(DAG); Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); } @@ -4308,12 +4512,10 @@ bool isLocalCall(const SDValue &Callee) return false; } -SDValue -PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue PPCTargetLowering::LowerCallResult( + SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, @@ -4354,23 +4556,18 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, return Chain; } -SDValue -PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, - bool isTailCall, bool isVarArg, bool IsPatchPoint, - bool hasNest, SelectionDAG &DAG, - SmallVector<std::pair<unsigned, SDValue>, 8> - &RegsToPass, - SDValue InFlag, SDValue Chain, - SDValue CallSeqStart, SDValue &Callee, - int SPDiff, unsigned NumBytes, - const SmallVectorImpl<ISD::InputArg> &Ins, - SmallVectorImpl<SDValue> &InVals, - ImmutableCallSite *CS) const { +SDValue PPCTargetLowering::FinishCall( + CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, + bool isPatchPoint, bool hasNest, SelectionDAG &DAG, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, + SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, + unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, + SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, - SPDiff, isTailCall, IsPatchPoint, hasNest, + SPDiff, isTailCall, isPatchPoint, hasNest, RegsToPass, Ops, NodeTys, CS, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls @@ -4417,7 +4614,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // same TOC), the NOP will remain unchanged. if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && - !IsPatchPoint) { + !isPatchPoint) { if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -4430,7 +4627,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // allocated and an unnecessary move instruction being generated. CallOpc = PPCISD::BCTRL_LOAD_TOC; - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); @@ -4472,12 +4669,35 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; - bool IsPatchPoint = CLI.IsPatchPoint; + bool isPatchPoint = CLI.IsPatchPoint; ImmutableCallSite *CS = CLI.CS; - if (isTailCall) - isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, - Ins, DAG); + if (isTailCall) { + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) + isTailCall = + IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, + isVarArg, Outs, Ins, DAG); + else + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, + Ins, DAG); + if (isTailCall) { + ++NumTailCalls; + if (!getTargetMachine().Options.GuaranteedTailCallOpt) + ++NumSiblingCalls; + + assert(isa<GlobalAddressSDNode>(Callee) && + "Callee should be an llvm::Function object."); + DEBUG( + const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + const unsigned Width = 80 - strlen("TCO caller: ") + - strlen(", callee linkage: 0, 0"); + dbgs() << "TCO caller: " + << left_justify(DAG.getMachineFunction().getName(), Width) + << ", callee linkage: " + << GV->getVisibility() << ", " << GV->getLinkage() << "\n" + ); + } + } if (!isTailCall && CS && CS->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " @@ -4486,29 +4706,27 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, IsPatchPoint, Outs, OutVals, Ins, + isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); else return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, IsPatchPoint, Outs, OutVals, Ins, + isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); } return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, - isTailCall, IsPatchPoint, Outs, OutVals, Ins, + isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); } -SDValue -PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, bool IsPatchPoint, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - ImmutableCallSite *CS) const { +SDValue PPCTargetLowering::LowerCall_32SVR4( + SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, + ImmutableCallSite *CS) const { // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. @@ -4534,12 +4752,13 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Assign locations to all of the outgoing arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext()); + PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), PtrByteSize); + if (useSoftFloat()) + CCInfo.PreAnalyzeCallOperands(Outs); if (isVarArg) { // Handle fixed and variable vector arguments differently. @@ -4572,11 +4791,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // All arguments are treated the same. CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } + CCInfo.clearWasPPCF128(); // Assign locations to all of the outgoing aggregate by value arguments. SmallVector<CCValAssign, 16> ByValArgLocs; - CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), - ByValArgLocs, *DAG.getContext()); + CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); @@ -4601,8 +4820,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Load the return address and frame pointer so it can be moved somewhere else // later. SDValue LROp, FPOp; - Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, - dl); + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument @@ -4676,9 +4894,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), - false, false, 0)); + MemOpChains.push_back( + DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); } else { // Calculate and remember argument location. CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, @@ -4712,10 +4929,10 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } if (isTailCall) - PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, - false, TailCallArguments); + PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, + TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); @@ -4723,12 +4940,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Copy an argument into memory, being careful to do this outside the // call sequence for the call to which the argument belongs. -SDValue -PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, - SDValue CallSeqStart, - ISD::ArgFlagsTy Flags, - SelectionDAG &DAG, - SDLoc dl) const { +SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( + SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, + SelectionDAG &DAG, const SDLoc &dl) const { SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); @@ -4741,27 +4955,29 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, return NewCallSeqStart; } -SDValue -PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, bool IsPatchPoint, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - ImmutableCallSite *CS) const { +SDValue PPCTargetLowering::LowerCall_64SVR4( + SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, + ImmutableCallSite *CS) const { bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); bool hasNest = false; + bool IsSibCall = false; - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned PtrByteSize = 8; MachineFunction &MF = DAG.getMachineFunction(); + if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) + IsSibCall = true; + // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is @@ -4881,9 +5097,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallConv == CallingConv::Fast) NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); + int SPDiff = 0; + // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. - int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + if (!IsSibCall) + SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // To protect arguments on the stack from being clobbered in a tail call, // force all the loads to happen before doing any other lowering. @@ -4892,15 +5111,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), - dl); + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; - Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, - dl); + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument @@ -4980,8 +5199,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, - MachinePointerInfo(), VT, - false, false, false, 0); + MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -5041,9 +5259,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, Flags, DAG, dl); // Load the slot into the register. - SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, - MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -5058,9 +5275,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, - MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; @@ -5214,13 +5430,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (isVarArg) { // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { - SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, - MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || @@ -5236,8 +5451,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -5278,13 +5493,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (isVarArg) { // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (QFPR_idx != NumQFPRs) { - SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, - Store, PtrOff, MachinePointerInfo(), - false, false, false, 0); + SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, + PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); } @@ -5294,8 +5508,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -5332,7 +5546,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Check if this is an indirect call (MTCTR/BCTRL). // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. - if (!isTailCall && !IsPatchPoint && + if (!isTailCall && !isPatchPoint && !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. @@ -5344,12 +5558,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore( Val.getValue(1), dl, Val, AddPtr, - MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset), - false, false, 0); + MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. - if (isELFv2ABI && !IsPatchPoint) + if (isELFv2ABI && !isPatchPoint) RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } @@ -5362,29 +5575,27 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } - if (isTailCall) - PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, - FPOp, true, TailCallArguments); + if (isTailCall && !IsSibCall) + PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, + TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, hasNest, + return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } -SDValue -PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, bool IsPatchPoint, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - ImmutableCallSite *CS) const { +SDValue PPCTargetLowering::LowerCall_Darwin( + SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, + ImmutableCallSite *CS) const { unsigned NumOps = Outs.size(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; @@ -5467,8 +5678,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; - Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, - dl); + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument @@ -5538,8 +5748,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, - MachinePointerInfo(), VT, - false, false, false, 0); + MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -5569,9 +5778,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, - MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; @@ -5606,24 +5814,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); if (isVarArg) { - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); // Float varargs are always shadowed in available integer registers if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, - MachinePointerInfo(), false, false, - false, 0); + SDValue Load = + DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, - MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -5665,13 +5871,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // entirely in R registers. Maybe later. PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, dl, PtrVT)); - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { - SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, - MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } @@ -5681,8 +5886,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), - false, false, false, 0); + SDValue Load = + DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -5754,10 +5959,10 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, } if (isTailCall) - PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, - FPOp, true, TailCallArguments); + PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, + TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); @@ -5774,11 +5979,11 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, } SDValue -PPCTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, +PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const { + const SDLoc &dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, @@ -5814,6 +6019,25 @@ PPCTargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const MCPhysReg *I = + TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); + if (I) { + for (; *I; ++I) { + + if (PPC::G8RCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i64)); + else if (PPC::F8RCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); + else if (PPC::CRRCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i1)); + else if (PPC::VRRCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::Other)); + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + } + } + RetOps[0] = Chain; // Update chain. // Add the flag if we have it. @@ -5823,8 +6047,9 @@ PPCTargetLowering::LowerReturn(SDValue Chain, return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); } -SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET( - SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { +SDValue +PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, + SelectionDAG &DAG) const { SDLoc dl(Op); // Get the corect type for integers. @@ -5839,13 +6064,13 @@ SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET( return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); } -SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const { +SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, + SelectionDAG &DAG) const { // When we pop the dynamic allocation we need to restore the SP link. SDLoc dl(Op); // Get the corect type for pointers. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. bool isPPC64 = Subtarget.isPPC64(); @@ -5857,22 +6082,20 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue SaveSP = Op.getOperand(1); // Load the old link SP. - SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, - MachinePointerInfo(), - false, false, false, 0); + SDValue LoadLinkSP = + DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. - return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), - false, false, 0); + return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); } SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. @@ -5895,7 +6118,7 @@ SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. @@ -5915,15 +6138,14 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { } SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const { + SelectionDAG &DAG) const { // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDLoc dl(Op); // Get the corect type for pointers. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, dl, PtrVT), Size); @@ -6113,7 +6335,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, - SDLoc dl) const { + const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) @@ -6156,15 +6378,14 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else - Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, - MPI, false, false, 0); + Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); // Result is a load from the stack slot. If loading 4 bytes, make sure to - // add in a bias. + // add in a bias on big endian. if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, dl, FIPtr.getValueType())); - MPI = MPI.getWithOffset(4); + MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); } RLI.Chain = Chain; @@ -6177,7 +6398,7 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, /// need for load/store combinations. SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, - SDLoc dl) const { + const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); @@ -6208,16 +6429,18 @@ SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - SDLoc dl) const { + const SDLoc &dl) const { if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) return LowerFP_TO_INTDirectMove(Op, DAG, dl); ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); - return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, - false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, - RLI.Ranges); + return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, + RLI.Alignment, + RLI.IsInvariant ? MachineMemOperand::MOInvariant + : MachineMemOperand::MONone, + RLI.AAInfo, RLI.Ranges); } // We're trying to insert a regular store, S, and then a load, L. If the @@ -6251,7 +6474,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, return false; RLI.Ptr = LD->getBasePtr(); - if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) { + if (LD->isIndexed() && !LD->getOffset().isUndef()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, @@ -6289,12 +6512,36 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain, DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } +/// \brief Analyze profitability of direct move +/// prefer float load to int load plus direct move +/// when there is no integer use of int load +static bool directMoveIsProfitable(const SDValue &Op) { + SDNode *Origin = Op.getOperand(0).getNode(); + if (Origin->getOpcode() != ISD::LOAD) + return true; + + for (SDNode::use_iterator UI = Origin->use_begin(), + UE = Origin->use_end(); + UI != UE; ++UI) { + + // Only look at the users of the loaded value. + if (UI.getUse().get().getResNo() != 0) + continue; + + if (UI->getOpcode() != ISD::SINT_TO_FP && + UI->getOpcode() != ISD::UINT_TO_FP) + return true; + } + + return false; +} + /// \brief Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, - SDLoc dl) const { + const SDLoc &dl) const { assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && "Invalid floating point type as target of conversion"); @@ -6335,9 +6582,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); - SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64); - FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs, - FPHalfs, FPHalfs); + SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); @@ -6359,7 +6604,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // If we have direct moves, we can do all the conversion, skip the store/load // however, without FPCVT we can't do most conversions. - if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT()) + if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && + Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && @@ -6429,9 +6675,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { - Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, - false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, - RLI.Ranges); + Bits = + DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, + RLI.IsInvariant ? MachineMemOperand::MOInvariant + : MachineMemOperand::MONone, + RLI.AAInfo, RLI.Ranges); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasLFIWAX() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { @@ -6459,15 +6707,15 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SINT.getOpcode() == ISD::ZERO_EXTEND)) && SINT.getOperand(0).getValueType() == MVT::i32) { MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = DAG.getStore( - DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), - false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FrameIdx)); assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); @@ -6505,7 +6753,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + EVT PtrVT = getPointerTy(MF.getDataLayout()); SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { @@ -6516,10 +6764,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = DAG.getStore( - DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), - false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FrameIdx)); assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); @@ -6554,14 +6802,12 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // STD the extended value into the stack slot. SDValue Store = DAG.getStore( DAG.getEntryNode(), dl, Ext64, FIdx, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), - false, false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); // Load the value as a double. Ld = DAG.getLoad( MVT::f64, dl, Store, FIdx, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), - false, false, false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); } // FCFID it and return it. @@ -6596,7 +6842,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + EVT PtrVT = getPointerTy(MF.getDataLayout()); // Save FP Control Word to register EVT NodeTys[] = { @@ -6608,14 +6854,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // Save FP register to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, - StackSlot, MachinePointerInfo(), false, false,0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, + MachinePointerInfo()); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, dl, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); - SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), - false, false, false, 0); + SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); // Transform as necessary SDValue CWD1 = @@ -6730,7 +6975,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { /// BuildSplatI - Build a canonical splati of Val with an element size of /// SplatSize. Cast the result to VT. static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, - SelectionDAG &DAG, SDLoc dl) { + SelectionDAG &DAG, const SDLoc &dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); static const MVT VTys[] = { // canonical VT to use for each size. @@ -6746,18 +6991,13 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, EVT CanonicalVT = VTys[SplatSize-1]; // Build a canonical splat for this value. - SDValue Elt = DAG.getConstant(Val, dl, MVT::i32); - SmallVector<SDValue, 8> Ops; - Ops.assign(CanonicalVT.getVectorNumElements(), Elt); - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); - return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); + return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); } /// BuildIntrinsicOp - Return a unary operator intrinsic node with the /// specified intrinsic ID. -static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, - SelectionDAG &DAG, SDLoc dl, - EVT DestVT = MVT::Other) { +static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, + const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op); @@ -6766,7 +7006,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, /// BuildIntrinsicOp - Return a binary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, SDLoc dl, + SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, @@ -6776,8 +7016,8 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, - SDValue Op2, SelectionDAG &DAG, - SDLoc dl, EVT DestVT = MVT::Other) { + SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, + EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); @@ -6785,8 +7025,8 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. -static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, - EVT VT, SelectionDAG &DAG, SDLoc dl) { +static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, + SelectionDAG &DAG, const SDLoc &dl) { // Force LHS/RHS to be the right type. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); @@ -6825,7 +7065,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, bool IsConst = true; for (unsigned i = 0; i < 4; ++i) { - if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (BVN->getOperand(i).isUndef()) continue; if (!isa<ConstantSDNode>(BVN->getOperand(i))) { IsConst = false; break; @@ -6838,12 +7078,12 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, Constant *NegOne = ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); - SmallVector<Constant*, 4> CV(4, NegOne); + Constant *CV[4]; for (unsigned i = 0; i < 4; ++i) { - if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + if (BVN->getOperand(i).isUndef()) CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); else if (isNullConstant(BVN->getOperand(i))) - continue; + CV[i] = NegOne; else CV[i] = One; } @@ -6852,15 +7092,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 16 /* alignment */); - SmallVector<SDValue, 2> Ops; - Ops.push_back(DAG.getEntryNode()); - Ops.push_back(CPIdx); - - SmallVector<EVT, 2> ValueVTs; - ValueVTs.push_back(MVT::v4i1); - ValueVTs.push_back(MVT::Other); // chain - SDVTList VTs = DAG.getVTList(ValueVTs); - + SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; + SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); return DAG.getMemIntrinsicNode( PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); @@ -6868,7 +7101,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SmallVector<SDValue, 4> Stores; for (unsigned i = 0; i < 4; ++i) { - if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (BVN->getOperand(i).isUndef()) continue; unsigned Offset = 4*i; SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); @@ -6876,19 +7109,16 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); if (StoreSize > 4) { - Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, - BVN->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), - MVT::i32, false, false, 0)); + Stores.push_back( + DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), MVT::i32)); } else { SDValue StoreValue = BVN->getOperand(i); if (StoreSize < 4) StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, - StoreValue, Idx, - PtrInfo.getWithOffset(Offset), - false, false, 0)); + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, + PtrInfo.getWithOffset(Offset))); } } @@ -6903,15 +7133,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // is typed as v4f64 because the QPX register integer states are not // explicitly represented. - SmallVector<SDValue, 2> Ops; - Ops.push_back(StoreChain); - Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32)); - Ops.push_back(FIdx); - - SmallVector<EVT, 2> ValueVTs; - ValueVTs.push_back(MVT::v4f64); - ValueVTs.push_back(MVT::Other); // chain - SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Ops[] = {StoreChain, + DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), + FIdx}; + SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Ops, MVT::v4i32, PtrInfo); @@ -6919,9 +7144,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), LoadedVect); - SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::f64); - FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, - FPZeros, FPZeros, FPZeros, FPZeros); + SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); } @@ -6949,8 +7172,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (SplatBits == 0) { // Canonicalize all zero vectors to be v4i32. if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { - SDValue Z = DAG.getConstant(0, dl, MVT::i32); - Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); + SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); } return Op; @@ -7089,7 +7311,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, - SDLoc dl) { + const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); @@ -7175,11 +7397,50 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); + unsigned ShiftElts, InsertAtByte; + bool Swap; + if (Subtarget.hasP9Vector() && + PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, + isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); + if (ShiftElts) { + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); + } + SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); + } + + if (Subtarget.hasVSX()) { + if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { + int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, + DAG.getConstant(SplatIdx, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); + } + + // Left shifts of 8 bytes are actually swaps. Convert accordingly. + if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); + SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); + } + + } + if (Subtarget.hasQPX()) { if (VT.getVectorNumElements() != 4) return SDValue(); - if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + if (V2.isUndef()) V2 = V1; int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); if (AlignIdx != -1) { @@ -7192,9 +7453,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SplatIdx -= 4; } - // FIXME: If SplatIdx == 0 and the input came from a load, then there is - // nothing to do. - return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, DAG.getConstant(SplatIdx, dl, MVT::i32)); } @@ -7217,7 +7475,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. - if (V2.getOpcode() == ISD::UNDEF) { + if (V2.isUndef()) { if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || @@ -7315,7 +7573,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant // vector that will get spilled to the constant pool. - if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + if (V2.isUndef()) V2 = V1; // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. @@ -7340,8 +7598,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, MVT::i32)); } - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, - ResultMask); + SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); if (isLittleEndian) return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V2, V1, VPermMask); @@ -7468,6 +7725,16 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { + unsigned IntrinsicID = + cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + + if (IntrinsicID == Intrinsic::thread_pointer) { + // Reads the thread pointer register, used for __builtin_thread_pointer. + bool is64bit = Subtarget.isPPC64(); + return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, + is64bit ? MVT::i64 : MVT::i32); + } + // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. SDLoc dl(Op); @@ -7566,12 +7833,10 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Op.getOperand(0), FIdx, MachinePointerInfo(), - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo()); // Load it out. - return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), - false, false, false, 0); + return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, @@ -7594,9 +7859,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to // understand how to form the extending load. - SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64); - FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, - FPHalfs, FPHalfs, FPHalfs, FPHalfs); + SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); @@ -7613,15 +7876,10 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue StoreChain = DAG.getEntryNode(); - SmallVector<SDValue, 2> Ops; - Ops.push_back(StoreChain); - Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32)); - Ops.push_back(Value); - Ops.push_back(FIdx); - - SmallVector<EVT, 2> ValueVTs; - ValueVTs.push_back(MVT::Other); // chain - SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Ops[] = {StoreChain, + DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), + Value, FIdx}; + SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, VTs, Ops, MVT::v4i32, PtrInfo); @@ -7631,9 +7889,8 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); - SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, - PtrInfo.getWithOffset(Offset), - false, false, false, 0); + SDValue IntVal = + DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); if (!Subtarget.useCRBits()) return IntVal; @@ -7662,24 +7919,20 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, ScalarMemVT = MemVT.getScalarType(); unsigned Stride = ScalarMemVT.getStoreSize(); - SmallVector<SDValue, 8> Vals, LoadChains; + SDValue Vals[4], LoadChains[4]; for (unsigned Idx = 0; Idx < 4; ++Idx) { SDValue Load; if (ScalarVT != ScalarMemVT) - Load = - DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, - BasePtr, - LN->getPointerInfo().getWithOffset(Idx*Stride), - ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(), - LN->isInvariant(), MinAlign(Alignment, Idx*Stride), - LN->getAAInfo()); + Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, + BasePtr, + LN->getPointerInfo().getWithOffset(Idx * Stride), + ScalarMemVT, MinAlign(Alignment, Idx * Stride), + LN->getMemOperand()->getFlags(), LN->getAAInfo()); else - Load = - DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, - LN->getPointerInfo().getWithOffset(Idx*Stride), - LN->isVolatile(), LN->isNonTemporal(), - LN->isInvariant(), MinAlign(Alignment, Idx*Stride), - LN->getAAInfo()); + Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, + LN->getPointerInfo().getWithOffset(Idx * Stride), + MinAlign(Alignment, Idx * Stride), + LN->getMemOperand()->getFlags(), LN->getAAInfo()); if (Idx == 0 && LN->isIndexed()) { assert(LN->getAddressingMode() == ISD::PRE_INC && @@ -7688,8 +7941,8 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, LN->getAddressingMode()); } - Vals.push_back(Load); - LoadChains.push_back(Load.getValue(1)); + Vals[Idx] = Load; + LoadChains[Idx] = Load.getValue(1); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, dl, @@ -7697,8 +7950,7 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getValueType(), Vals); + SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); if (LN->isIndexed()) { SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; @@ -7715,23 +7967,20 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, // To lower v4i1 from a byte array, we load the byte elements of the // vector and then reuse the BUILD_VECTOR logic. - SmallVector<SDValue, 4> VectElmts, VectElmtChains; + SDValue VectElmts[4], VectElmtChains[4]; for (unsigned i = 0; i < 4; ++i) { SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); - VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD, - dl, MVT::i32, LoadChain, Idx, - LN->getPointerInfo().getWithOffset(i), - MVT::i8 /* memory type */, - LN->isVolatile(), LN->isNonTemporal(), - LN->isInvariant(), - 1 /* alignment */, LN->getAAInfo())); - VectElmtChains.push_back(VectElmts[i].getValue(1)); + VectElmts[i] = DAG.getExtLoad( + ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, + LN->getPointerInfo().getWithOffset(i), MVT::i8, + /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); + VectElmtChains[i] = VectElmts[i].getValue(1); } LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); - SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts); + SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); SDValue RVals[] = { Value, LoadChain }; return DAG.getMergeValues(RVals, dl); @@ -7759,7 +8008,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, ScalarMemVT = MemVT.getScalarType(); unsigned Stride = ScalarMemVT.getStoreSize(); - SmallVector<SDValue, 8> Stores; + SDValue Stores[4]; for (unsigned Idx = 0; Idx < 4; ++Idx) { SDValue Ex = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, @@ -7767,16 +8016,15 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SDValue Store; if (ScalarVT != ScalarMemVT) Store = - DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, - SN->getPointerInfo().getWithOffset(Idx*Stride), - ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(), - MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); + DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, + SN->getPointerInfo().getWithOffset(Idx * Stride), + ScalarMemVT, MinAlign(Alignment, Idx * Stride), + SN->getMemOperand()->getFlags(), SN->getAAInfo()); else - Store = - DAG.getStore(StoreChain, dl, Ex, BasePtr, - SN->getPointerInfo().getWithOffset(Idx*Stride), - SN->isVolatile(), SN->isNonTemporal(), - MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); + Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, + SN->getPointerInfo().getWithOffset(Idx * Stride), + MinAlign(Alignment, Idx * Stride), + SN->getMemOperand()->getFlags(), SN->getAAInfo()); if (Idx == 0 && SN->isIndexed()) { assert(SN->getAddressingMode() == ISD::PRE_INC && @@ -7788,7 +8036,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, dl, BasePtr.getValueType())); - Stores.push_back(Store); + Stores[Idx] = Store; } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); @@ -7811,9 +8059,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to // understand how to form the extending load. - SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64); - FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, - FPHalfs, FPHalfs, FPHalfs, FPHalfs); + SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); @@ -7829,43 +8075,37 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SmallVector<SDValue, 2> Ops; - Ops.push_back(StoreChain); - Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32)); - Ops.push_back(Value); - Ops.push_back(FIdx); - - SmallVector<EVT, 2> ValueVTs; - ValueVTs.push_back(MVT::Other); // chain - SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Ops[] = {StoreChain, + DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), + Value, FIdx}; + SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, VTs, Ops, MVT::v4i32, PtrInfo); // Move data into the byte array. - SmallVector<SDValue, 4> Loads, LoadChains; + SDValue Loads[4], LoadChains[4]; for (unsigned i = 0; i < 4; ++i) { unsigned Offset = 4*i; SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); - Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx, - PtrInfo.getWithOffset(Offset), - false, false, false, 0)); - LoadChains.push_back(Loads[i].getValue(1)); + Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, + PtrInfo.getWithOffset(Offset)); + LoadChains[i] = Loads[i].getValue(1); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - SmallVector<SDValue, 4> Stores; + SDValue Stores[4]; for (unsigned i = 0; i < 4; ++i) { SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); - Stores.push_back(DAG.getTruncStore( + Stores[i] = DAG.getTruncStore( StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), - MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(), - 1 /* alignment */, SN->getAAInfo())); + MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), + SN->getAAInfo()); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); @@ -7958,18 +8198,22 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: - return LowerVASTART(Op, DAG, Subtarget); + return LowerVASTART(Op, DAG); case ISD::VAARG: - return LowerVAARG(Op, DAG, Subtarget); + return LowerVAARG(Op, DAG); case ISD::VACOPY: - return LowerVACOPY(Op, DAG, Subtarget); + return LowerVACOPY(Op, DAG); + + case ISD::STACKRESTORE: + return LowerSTACKRESTORE(Op, DAG); - case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); case ISD::DYNAMIC_STACKALLOC: - return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); - case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG, Subtarget); + return LowerDYNAMIC_STACKALLOC(Op, DAG); + + case ISD::GET_DYNAMIC_AREA_OFFSET: + return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); @@ -8048,7 +8292,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, EVT VT = N->getValueType(0); if (VT == MVT::i64) { - SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); + SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); Results.push_back(NewNode); Results.push_back(NewNode.getValue(1)); @@ -8099,9 +8343,9 @@ static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (Ord == SequentiallyConsistent) + if (Ord == AtomicOrdering::SequentiallyConsistent) return callIntrinsic(Builder, Intrinsic::ppc_sync); - if (isAtLeastRelease(Ord)) + if (isReleaseOrStronger(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); return nullptr; } @@ -8109,7 +8353,7 @@ Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (IsLoad && isAtLeastAcquire(Ord)) + if (IsLoad && isAcquireOrStronger(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); // FIXME: this is too conservative, a dependent branch + isync is enough. // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and @@ -8119,7 +8363,7 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, } MachineBasicBlock * -PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, +PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. @@ -8154,11 +8398,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptrA = MI->getOperand(1).getReg(); - unsigned ptrB = MI->getOperand(2).getReg(); - unsigned incr = MI->getOperand(3).getReg(); - DebugLoc dl = MI->getDebugLoc(); + unsigned dest = MI.getOperand(0).getReg(); + unsigned ptrA = MI.getOperand(1).getReg(); + unsigned ptrB = MI.getOperand(2).getReg(); + unsigned incr = MI.getOperand(3).getReg(); + DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -8203,9 +8447,9 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, } MachineBasicBlock * -PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, +PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, - bool is8bit, // operation + bool is8bit, // operation unsigned BinOpcode) const { // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) @@ -8224,11 +8468,11 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptrA = MI->getOperand(1).getReg(); - unsigned ptrB = MI->getOperand(2).getReg(); - unsigned incr = MI->getOperand(3).getReg(); - DebugLoc dl = MI->getDebugLoc(); + unsigned dest = MI.getOperand(0).getReg(); + unsigned ptrA = MI.getOperand(1).getReg(); + unsigned ptrB = MI.getOperand(2).getReg(); + unsigned incr = MI.getOperand(3).getReg(); + DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -8334,10 +8578,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, return BB; } -llvm::MachineBasicBlock* -PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, +llvm::MachineBasicBlock * +PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineFunction *MF = MBB->getParent(); @@ -8347,10 +8591,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference - MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); - MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); - unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(RC->hasType(MVT::i32) && "Invalid destination!"); unsigned mainDstReg = MRI.createVirtualRegister(RC); @@ -8407,7 +8651,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); unsigned LabelReg = MRI.createVirtualRegister(PtrRC); - unsigned BufReg = MI->getOperand(1).getReg(); + unsigned BufReg = MI.getOperand(1).getReg(); if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); @@ -8477,22 +8721,22 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, .addReg(mainDstReg).addMBB(mainMBB) .addReg(restoreDstReg).addMBB(thisMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return sinkMBB; } MachineBasicBlock * -PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, +PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference - MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); - MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && @@ -8507,10 +8751,8 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, unsigned BP = (PVT == MVT::i64) ? PPC::X30 - : (Subtarget.isSVR4ABI() && - MF->getTarget().getRelocationModel() == Reloc::PIC_ - ? PPC::R29 - : PPC::R30); + : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 + : PPC::R30); MachineInstrBuilder MIB; @@ -8519,7 +8761,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); - unsigned BufReg = MI->getOperand(0).getReg(); + unsigned BufReg = MI.getOperand(0).getReg(); // Reload FP (the jumped-to function may not have had a // frame pointer, and if so, then its r31 will be restored @@ -8586,34 +8828,34 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } MachineBasicBlock * -PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, +PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { - if (MI->getOpcode() == TargetOpcode::STACKMAP || - MI->getOpcode() == TargetOpcode::PATCHPOINT) { + if (MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT) { if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && - MI->getOpcode() == TargetOpcode::PATCHPOINT) { + MI.getOpcode() == TargetOpcode::PATCHPOINT) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no // way to mark the dependence as implicit there, and so the stackmap code // will confuse it with a regular operand. Instead, add the dependence // here. setUsesTOCBasePtr(*BB->getParent()); - MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); + MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); } return emitPatchPoint(MI, BB); } - if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || - MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { + if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || + MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { return emitEHSjLjSetJmp(MI, BB); - } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || - MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { + } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || + MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { return emitEHSjLjLongJmp(MI, BB); } @@ -8626,44 +8868,43 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); - if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8 || - MI->getOpcode() == PPC::SELECT_I4 || - MI->getOpcode() == PPC::SELECT_I8)) { + if (Subtarget.hasISEL() && + (MI.getOpcode() == PPC::SELECT_CC_I4 || + MI.getOpcode() == PPC::SELECT_CC_I8 || + MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { SmallVector<MachineOperand, 2> Cond; - if (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8) - Cond.push_back(MI->getOperand(4)); + if (MI.getOpcode() == PPC::SELECT_CC_I4 || + MI.getOpcode() == PPC::SELECT_CC_I8) + Cond.push_back(MI.getOperand(4)); else Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); - Cond.push_back(MI->getOperand(1)); - - DebugLoc dl = MI->getDebugLoc(); - TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), - Cond, MI->getOperand(2).getReg(), - MI->getOperand(3).getReg()); - } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8 || - MI->getOpcode() == PPC::SELECT_CC_F4 || - MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_QFRC || - MI->getOpcode() == PPC::SELECT_CC_QSRC || - MI->getOpcode() == PPC::SELECT_CC_QBRC || - MI->getOpcode() == PPC::SELECT_CC_VRRC || - MI->getOpcode() == PPC::SELECT_CC_VSFRC || - MI->getOpcode() == PPC::SELECT_CC_VSSRC || - MI->getOpcode() == PPC::SELECT_CC_VSRC || - MI->getOpcode() == PPC::SELECT_I4 || - MI->getOpcode() == PPC::SELECT_I8 || - MI->getOpcode() == PPC::SELECT_F4 || - MI->getOpcode() == PPC::SELECT_F8 || - MI->getOpcode() == PPC::SELECT_QFRC || - MI->getOpcode() == PPC::SELECT_QSRC || - MI->getOpcode() == PPC::SELECT_QBRC || - MI->getOpcode() == PPC::SELECT_VRRC || - MI->getOpcode() == PPC::SELECT_VSFRC || - MI->getOpcode() == PPC::SELECT_VSSRC || - MI->getOpcode() == PPC::SELECT_VSRC) { + Cond.push_back(MI.getOperand(1)); + + DebugLoc dl = MI.getDebugLoc(); + TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, + MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); + } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || + MI.getOpcode() == PPC::SELECT_CC_I8 || + MI.getOpcode() == PPC::SELECT_CC_F4 || + MI.getOpcode() == PPC::SELECT_CC_F8 || + MI.getOpcode() == PPC::SELECT_CC_QFRC || + MI.getOpcode() == PPC::SELECT_CC_QSRC || + MI.getOpcode() == PPC::SELECT_CC_QBRC || + MI.getOpcode() == PPC::SELECT_CC_VRRC || + MI.getOpcode() == PPC::SELECT_CC_VSFRC || + MI.getOpcode() == PPC::SELECT_CC_VSSRC || + MI.getOpcode() == PPC::SELECT_CC_VSRC || + MI.getOpcode() == PPC::SELECT_I4 || + MI.getOpcode() == PPC::SELECT_I8 || + MI.getOpcode() == PPC::SELECT_F4 || + MI.getOpcode() == PPC::SELECT_F8 || + MI.getOpcode() == PPC::SELECT_QFRC || + MI.getOpcode() == PPC::SELECT_QSRC || + MI.getOpcode() == PPC::SELECT_QBRC || + MI.getOpcode() == PPC::SELECT_VRRC || + MI.getOpcode() == PPC::SELECT_VSFRC || + MI.getOpcode() == PPC::SELECT_VSSRC || + MI.getOpcode() == PPC::SELECT_VSRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. @@ -8677,7 +8918,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); @@ -8690,23 +8931,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - if (MI->getOpcode() == PPC::SELECT_I4 || - MI->getOpcode() == PPC::SELECT_I8 || - MI->getOpcode() == PPC::SELECT_F4 || - MI->getOpcode() == PPC::SELECT_F8 || - MI->getOpcode() == PPC::SELECT_QFRC || - MI->getOpcode() == PPC::SELECT_QSRC || - MI->getOpcode() == PPC::SELECT_QBRC || - MI->getOpcode() == PPC::SELECT_VRRC || - MI->getOpcode() == PPC::SELECT_VSFRC || - MI->getOpcode() == PPC::SELECT_VSSRC || - MI->getOpcode() == PPC::SELECT_VSRC) { + if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || + MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || + MI.getOpcode() == PPC::SELECT_QFRC || + MI.getOpcode() == PPC::SELECT_QSRC || + MI.getOpcode() == PPC::SELECT_QBRC || + MI.getOpcode() == PPC::SELECT_VRRC || + MI.getOpcode() == PPC::SELECT_VSFRC || + MI.getOpcode() == PPC::SELECT_VSSRC || + MI.getOpcode() == PPC::SELECT_VSRC) { BuildMI(BB, dl, TII->get(PPC::BC)) - .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + .addReg(MI.getOperand(1).getReg()) + .addMBB(sinkMBB); } else { - unsigned SelectPred = MI->getOperand(4).getImm(); + unsigned SelectPred = MI.getOperand(4).getImm(); BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + .addImm(SelectPred) + .addReg(MI.getOperand(1).getReg()) + .addMBB(sinkMBB); } // copy0MBB: @@ -8721,11 +8963,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(*BB, BB->begin(), dl, - TII->get(PPC::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - } else if (MI->getOpcode() == PPC::ReadTB) { + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) + .addReg(MI.getOperand(3).getReg()) + .addMBB(copy0MBB) + .addReg(MI.getOperand(2).getReg()) + .addMBB(thisMBB); + } else if (MI.getOpcode() == PPC::ReadTB) { // To read the 64-bit time-base register on a 32-bit target, we read the // two halves. Should the counter have wrapped while it was being read, we // need to try again. @@ -8740,7 +8983,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); F->insert(It, readMBB); F->insert(It, sinkMBB); @@ -8754,8 +8997,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - unsigned LoReg = MI->getOperand(0).getReg(); - unsigned HiReg = MI->getOperand(1).getReg(); + unsigned LoReg = MI.getOperand(0).getReg(); + unsigned HiReg = MI.getOperand(1).getReg(); BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); @@ -8770,81 +9013,80 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(readMBB); BB->addSuccessor(sinkMBB); - } - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) + } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); - else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); - else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); - else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); - else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) BB = EmitAtomicBinary(MI, BB, 4, 0); - else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) BB = EmitAtomicBinary(MI, BB, 8, 0); - else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || + else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || + MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || (Subtarget.hasPartwordAtomics() && - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || + MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || (Subtarget.hasPartwordAtomics() && - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { - bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; + MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { + bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; - switch(MI->getOpcode()) { + switch (MI.getOpcode()) { default: llvm_unreachable("Compare and swap of unknown size"); case PPC::ATOMIC_CMP_SWAP_I8: @@ -8866,12 +9108,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, StoreMnemonic = PPC::STDCX; break; } - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptrA = MI->getOperand(1).getReg(); - unsigned ptrB = MI->getOperand(2).getReg(); - unsigned oldval = MI->getOperand(3).getReg(); - unsigned newval = MI->getOperand(4).getReg(); - DebugLoc dl = MI->getDebugLoc(); + unsigned dest = MI.getOperand(0).getReg(); + unsigned ptrA = MI.getOperand(1).getReg(); + unsigned ptrB = MI.getOperand(2).getReg(); + unsigned oldval = MI.getOperand(3).getReg(); + unsigned newval = MI.getOperand(4).getReg(); + DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -8928,20 +9170,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // exitMBB: // ... BB = exitMBB; - } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { + } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || + MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { // We must use 64-bit registers for addresses when targeting 64-bit, // since we're actually doing arithmetic on them. Other registers // can be 32-bit. bool is64bit = Subtarget.isPPC64(); - bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; + bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptrA = MI->getOperand(1).getReg(); - unsigned ptrB = MI->getOperand(2).getReg(); - unsigned oldval = MI->getOperand(3).getReg(); - unsigned newval = MI->getOperand(4).getReg(); - DebugLoc dl = MI->getDebugLoc(); + unsigned dest = MI.getOperand(0).getReg(); + unsigned ptrA = MI.getOperand(1).getReg(); + unsigned ptrB = MI.getOperand(2).getReg(); + unsigned oldval = MI.getOperand(3).getReg(); + unsigned newval = MI.getOperand(4).getReg(); + DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -9076,14 +9318,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) .addReg(ShiftReg); - } else if (MI->getOpcode() == PPC::FADDrtz) { + } else if (MI.getOpcode() == PPC::FADDrtz) { // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src1 = MI->getOperand(1).getReg(); - unsigned Src2 = MI->getOperand(2).getReg(); - DebugLoc dl = MI->getDebugLoc(); + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Src1 = MI.getOperand(1).getReg(); + unsigned Src2 = MI.getOperand(2).getReg(); + DebugLoc dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); @@ -9100,29 +9342,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); - } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || - MI->getOpcode() == PPC::ANDIo_1_GT_BIT || - MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || - MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { - unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || - MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? - PPC::ANDIo8 : PPC::ANDIo; - bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || - MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); + } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI.getOpcode() == PPC::ANDIo_1_GT_BIT || + MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { + unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) + ? PPC::ANDIo8 + : PPC::ANDIo; + bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) - .addReg(MI->getOperand(1).getReg()).addImm(1); + .addReg(MI.getOperand(1).getReg()) + .addImm(1); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), - MI->getOperand(0).getReg()) - .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); - } else if (MI->getOpcode() == PPC::TCHECK_RET) { - DebugLoc Dl = MI->getDebugLoc(); + MI.getOperand(0).getReg()) + .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); + } else if (MI.getOpcode() == PPC::TCHECK_RET) { + DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); @@ -9131,7 +9375,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, llvm_unreachable("Unexpected instr type to insert"); } - MI->eraseFromParent(); // The pseudo instruction is gone now. + MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -9650,14 +9894,18 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); } + std::list<HandleSDNode> PromOpHandles; + for (auto &PromOp : PromOps) + PromOpHandles.emplace_back(PromOp); + // Replace all operations (these are all the same, but have a different // (i1) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. Any intermediate truncations or // extensions disappear. - while (!PromOps.empty()) { - SDValue PromOp = PromOps.back(); - PromOps.pop_back(); + while (!PromOpHandles.empty()) { + SDValue PromOp = PromOpHandles.back().getValue(); + PromOpHandles.pop_back(); if (PromOp.getOpcode() == ISD::TRUNCATE || PromOp.getOpcode() == ISD::SIGN_EXTEND || @@ -9666,7 +9914,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && PromOp.getOperand(0).getValueType() != MVT::i1) { // The operand is not yet ready (see comment below). - PromOps.insert(PromOps.begin(), PromOp); + PromOpHandles.emplace_front(PromOp); continue; } @@ -9693,7 +9941,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). - PromOps.insert(PromOps.begin(), PromOp); + PromOpHandles.emplace_front(PromOp); continue; } @@ -9900,13 +10148,17 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); } + std::list<HandleSDNode> PromOpHandles; + for (auto &PromOp : PromOps) + PromOpHandles.emplace_back(PromOp); + // Replace all operations (these are all the same, but have a different // (promoted) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. - while (!PromOps.empty()) { - SDValue PromOp = PromOps.back(); - PromOps.pop_back(); + while (!PromOpHandles.empty()) { + SDValue PromOp = PromOpHandles.back().getValue(); + PromOpHandles.pop_back(); unsigned C; switch (PromOp.getOpcode()) { @@ -9923,7 +10175,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). - PromOps.insert(PromOps.begin(), PromOp); + PromOpHandles.emplace_front(PromOp); continue; } @@ -9935,7 +10187,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, PromOp.getOperand(0).getValueType() != N->getValueType(0)) || (SelectTruncOp[1].count(PromOp.getNode()) && PromOp.getOperand(1).getValueType() != N->getValueType(0))) { - PromOps.insert(PromOps.begin(), PromOp); + PromOpHandles.emplace_front(PromOp); continue; } } @@ -9997,6 +10249,59 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, ShiftCst); } +SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + "Should be called with a BUILD_VECTOR node"); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX()) + return SDValue(); + + // Looking for: + // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) + if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP && + N->getOperand(0).getOpcode() != ISD::UINT_TO_FP) + return SDValue(); + if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && + N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) + return SDValue(); + if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode()) + return SDValue(); + + SDValue Ext1 = N->getOperand(0).getOperand(0); + SDValue Ext2 = N->getOperand(1).getOperand(0); + if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1)); + ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); + if (!Ext1Op || !Ext2Op) + return SDValue(); + if (Ext1.getValueType() != MVT::i32 || + Ext2.getValueType() != MVT::i32) + if (Ext1.getOperand(0) != Ext2.getOperand(0)) + return SDValue(); + + int FirstElem = Ext1Op->getZExtValue(); + int SecondElem = Ext2Op->getZExtValue(); + int SubvecIdx; + if (FirstElem == 0 && SecondElem == 1) + SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; + else if (FirstElem == 2 && SecondElem == 3) + SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; + else + return SDValue(); + + SDValue SrcVec = Ext1.getOperand(0); + auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? + PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; + return DAG.getNode(NodeType, dl, MVT::v2f64, + SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); +} + SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const { assert((N->getOpcode() == ISD::SINT_TO_FP || @@ -10109,13 +10414,24 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, MVT VecTy = N->getValueType(0).getSimpleVT(); SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, - DAG.getVTList(VecTy, MVT::Other), - LoadOps, VecTy, MMO); + DAG.getVTList(MVT::v2f64, MVT::Other), + LoadOps, MVT::v2f64, MMO); + DCI.AddToWorklist(Load.getNode()); Chain = Load.getValue(1); - SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, - DAG.getVTList(VecTy, MVT::Other), Chain, Load); + SDValue Swap = DAG.getNode( + PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); DCI.AddToWorklist(Swap.getNode()); + + // Add a bitcast if the resulting load type doesn't match v2f64. + if (VecTy != MVT::v2f64) { + SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); + DCI.AddToWorklist(N.getNode()); + // Package {bitcast value, swap's chain} to match Load's shape. + return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), + N, Swap.getValue(1)); + } + return Swap; } @@ -10159,8 +10475,15 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); + + // All stores are done as v2f64 and possible bit cast. + if (VecTy != MVT::v2f64) { + Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); + DCI.AddToWorklist(Src.getNode()); + } + SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, - DAG.getVTList(VecTy, MVT::Other), Chain, Src); + DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); DCI.AddToWorklist(Swap.getNode()); Chain = Swap.getValue(1); SDValue StoreOps[] = { Chain, Swap, Base }; @@ -10277,6 +10600,111 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return expandVSXLoadForLE(N, DCI); } + // We sometimes end up with a 64-bit integer load, from which we extract + // two single-precision floating-point numbers. This happens with + // std::complex<float>, and other similar structures, because of the way we + // canonicalize structure copies. However, if we lack direct moves, + // then the final bitcasts from the extracted integer values to the + // floating-point numbers turn into store/load pairs. Even with direct moves, + // just loading the two floating-point numbers is likely better. + auto ReplaceTwoFloatLoad = [&]() { + if (VT != MVT::i64) + return false; + + if (LD->getExtensionType() != ISD::NON_EXTLOAD || + LD->isVolatile()) + return false; + + // We're looking for a sequence like this: + // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 + // t16: i64 = srl t13, Constant:i32<32> + // t17: i32 = truncate t16 + // t18: f32 = bitcast t17 + // t19: i32 = truncate t13 + // t20: f32 = bitcast t19 + + if (!LD->hasNUsesOfValue(2, 0)) + return false; + + auto UI = LD->use_begin(); + while (UI.getUse().getResNo() != 0) ++UI; + SDNode *Trunc = *UI++; + while (UI.getUse().getResNo() != 0) ++UI; + SDNode *RightShift = *UI; + if (Trunc->getOpcode() != ISD::TRUNCATE) + std::swap(Trunc, RightShift); + + if (Trunc->getOpcode() != ISD::TRUNCATE || + Trunc->getValueType(0) != MVT::i32 || + !Trunc->hasOneUse()) + return false; + if (RightShift->getOpcode() != ISD::SRL || + !isa<ConstantSDNode>(RightShift->getOperand(1)) || + RightShift->getConstantOperandVal(1) != 32 || + !RightShift->hasOneUse()) + return false; + + SDNode *Trunc2 = *RightShift->use_begin(); + if (Trunc2->getOpcode() != ISD::TRUNCATE || + Trunc2->getValueType(0) != MVT::i32 || + !Trunc2->hasOneUse()) + return false; + + SDNode *Bitcast = *Trunc->use_begin(); + SDNode *Bitcast2 = *Trunc2->use_begin(); + + if (Bitcast->getOpcode() != ISD::BITCAST || + Bitcast->getValueType(0) != MVT::f32) + return false; + if (Bitcast2->getOpcode() != ISD::BITCAST || + Bitcast2->getValueType(0) != MVT::f32) + return false; + + if (Subtarget.isLittleEndian()) + std::swap(Bitcast, Bitcast2); + + // Bitcast has the second float (in memory-layout order) and Bitcast2 + // has the first one. + + SDValue BasePtr = LD->getBasePtr(); + if (LD->isIndexed()) { + assert(LD->getAddressingMode() == ISD::PRE_INC && + "Non-pre-inc AM on PPC?"); + BasePtr = + DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + LD->getOffset()); + } + + auto MMOFlags = + LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; + SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, + LD->getPointerInfo(), LD->getAlignment(), + MMOFlags, LD->getAAInfo()); + SDValue AddPtr = + DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(4, dl)); + SDValue FloatLoad2 = DAG.getLoad( + MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, + LD->getPointerInfo().getWithOffset(4), + MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); + + if (LD->isIndexed()) { + // Note that DAGCombine should re-form any pre-increment load(s) from + // what is produced here if that makes sense. + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); + } + + DCI.CombineTo(Bitcast2, FloatLoad); + DCI.CombineTo(Bitcast, FloatLoad2); + + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), + SDValue(FloatLoad2.getNode(), 1)); + return true; + }; + + if (ReplaceTwoFloatLoad()) + return SDValue(N, 0); + EVT MemVT = LD->getMemoryVT(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); @@ -10710,6 +11138,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } break; } + case ISD::BUILD_VECTOR: + return DAGCombineBuildVector(N, DCI); } return SDValue(); @@ -10801,7 +11231,8 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: - case PPC::DIR_PWR8: { + case PPC::DIR_PWR8: + case PPC::DIR_PWR9: { if (!ML) break; @@ -10812,7 +11243,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { uint64_t LoopSize = 0; for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { - LoopSize += TII->GetInstSizeInBytes(J); + LoopSize += TII->GetInstSizeInBytes(*J); if (LoopSize > 32) break; } @@ -10837,6 +11268,7 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const { case 'b': case 'r': case 'f': + case 'd': case 'v': case 'y': return C_RegisterClass; @@ -10928,6 +11360,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); + // 'd' and 'f' constraints are both defined to be "the floating point + // registers", where one is for 32-bit and the other for 64-bit. We don't + // really care overly much here so just give them all the same reg classes. + case 'd': case 'f': if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::F4RCRegClass); @@ -11126,13 +11562,13 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, isPPC64 ? MVT::i64 : MVT::i32); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); + MachinePointerInfo()); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, - MachinePointerInfo(), false, false, false, 0); + MachinePointerInfo()); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -11144,7 +11580,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Naked functions never have a frame pointer, and so we use r1. For all @@ -11159,8 +11595,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, PtrVT); while (Depth--) FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), - FrameAddr, MachinePointerInfo(), false, false, - false, 0); + FrameAddr, MachinePointerInfo()); return FrameAddr; } @@ -11567,10 +12002,8 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles( if (VT == MVT::v2i64) return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves - if (Subtarget.hasQPX()) { - if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) - return true; - } + if (Subtarget.hasVSX() || Subtarget.hasQPX()) + return true; return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } @@ -11588,3 +12021,70 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return PPC::createFastISel(FuncInfo, LibInfo); } + +void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { + if (Subtarget.isDarwinABI()) return; + if (!Subtarget.isPPC64()) return; + + // Update IsSplitCSR in PPCFunctionInfo + PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); + PFI->setIsSplitCSR(true); +} + +void PPCTargetLowering::insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const { + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); + if (!IStart) + return; + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); + for (const MCPhysReg *I = IStart; *I; ++I) { + const TargetRegisterClass *RC = nullptr; + if (PPC::G8RCRegClass.contains(*I)) + RC = &PPC::G8RCRegClass; + else if (PPC::F8RCRegClass.contains(*I)) + RC = &PPC::F8RCRegClass; + else if (PPC::CRRCRegClass.contains(*I)) + RC = &PPC::CRRCRegClass; + else if (PPC::VRRCRegClass.contains(*I)) + RC = &PPC::VRRCRegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + + unsigned NewVR = MRI->createVirtualRegister(RC); + // Create copy from CSR to a virtual register. + // FIXME: this currently does not emit CFI pseudo-instructions, it works + // fine for CXX_FAST_TLS since the C++-style TLS access functions should be + // nounwind. If we want to generalize this later, we may need to emit + // CFI pseudo-instructions. + assert(Entry->getParent()->getFunction()->hasFnAttribute( + Attribute::NoUnwind) && + "Function should be nounwind in insertCopiesSplitCSR!"); + Entry->addLiveIn(*I); + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) + .addReg(*I); + + // Insert the copy-back instructions right before the terminator + for (auto *Exit : Exits) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) + .addReg(NewVR); + } +} + +// Override to enable LOAD_STACK_GUARD lowering on Linux. +bool PPCTargetLowering::useLoadStackGuardNode() const { + if (!Subtarget.isTargetLinux()) + return TargetLowering::useLoadStackGuardNode(); + return true; +} + +// Override to disable global variable loading on Linux. +void PPCTargetLowering::insertSSPDeclarations(Module &M) const { + if (!Subtarget.isTargetLinux()) + return TargetLowering::insertSSPDeclarations(M); +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 44bcb8942cfc..e3be8074e62e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -61,6 +61,18 @@ namespace llvm { /// VPERM, + /// XXSPLT - The PPC VSX splat instructions + /// + XXSPLT, + + /// XXINSERT - The PPC VSX insert instruction + /// + XXINSERT, + + /// VECSHL - The PPC VSX shift left instruction + /// + VECSHL, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, @@ -133,6 +145,16 @@ namespace llvm { /// Direct move from a GPR to a VSX register (zero) MTVSRZ, + /// Extract a subvector from signed integer vector and convert to FP. + /// It is primarily used to convert a (widened) illegal integer vector + /// type to a legal floating point vector type. + /// For example v2i32 -> widened to v4i32 -> v2f64 + SINT_VEC_TO_FP, + + /// Extract a subvector from unsigned integer vector and convert to FP. + /// As with SINT_VEC_TO_FP, used for converting illegal types. + UINT_VEC_TO_FP, + // FIXME: Remove these once the ANDI glue bug is fixed: /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the /// eq or gt bit of CR0 after executing andi. x, 1. This is used to @@ -297,6 +319,10 @@ namespace llvm { /// of outputs. XXSWAPD, + /// An SDNode for swaps that are not associated with any loads/stores + /// and thereby have no chain. + SWAP_NO_CHAIN, + /// QVFPERM = This corresponds to the QPX qvfperm instruction. QVFPERM, @@ -402,6 +428,16 @@ namespace llvm { /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); + /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by + /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any + /// shuffle of v4f32/v4i32 vectors that just inserts one element from one + /// vector into the other. This function will also set a couple of + /// output parameters for how much the source vector needs to be shifted and + /// what byte number needs to be specified for the instruction to put the + /// element in the desired location of the target vector. + bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE); + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); @@ -428,6 +464,20 @@ namespace llvm { /// DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + /// getPreferredVectorAction - The code we generate when vector types are + /// legalized by promoting the integer element type is often much worse + /// than code we generate if we widen the type for applicable vector types. + /// The issue with promoting is that the vector is scalaraized, individual + /// elements promoted and then the vector is rebuilt. So say we load a pair + /// of v4i8's and shuffle them. This will turn into a mess of 8 extending + /// loads, moves back into VSR's (or memory ops if we don't have moves) and + /// then the VPERM for the shuffle. All in all a very slow sequence. + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) + const override { + if (VT.getVectorElementType().getSizeInBits() % 8 == 0) + return TypeWidenVector; + return TargetLoweringBase::getPreferredVectorAction(VT); + } bool useSoftFloat() const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { @@ -442,6 +492,18 @@ namespace llvm { return true; } + bool supportSplitCSR(MachineFunction *MF) const override { + return + MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + } + + void initializeSplitCSR(MachineBasicBlock *Entry) const override; + + void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; + /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -508,26 +570,31 @@ namespace llvm { unsigned getPrefLoopAlignment(MachineLoop *ML) const override; + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return true; + } + Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const override; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const override; + MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode) const; - MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, - bool is8bit, unsigned Opcode) const; + bool is8bit, + unsigned Opcode) const; - MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; - MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; ConstraintType getConstraintType(StringRef Constraint) const override; @@ -672,6 +739,10 @@ namespace llvm { unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + /// Override to support customized stack guard loading. + bool useLoadStackGuardNode() const override; + void insertSSPDeclarations(Module &M) const override; + private: struct ReuseLoadInfo { SDValue Ptr; @@ -693,11 +764,11 @@ namespace llvm { SelectionDAG &DAG) const; void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, - SelectionDAG &DAG, SDLoc dl) const; + SelectionDAG &DAG, const SDLoc &dl) const; SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, - SDLoc dl) const; + const SDLoc &dl) const; SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, - SDLoc dl) const; + const SDLoc &dl) const; SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; @@ -709,13 +780,20 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; - SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, - int SPDiff, - SDValue Chain, - SDValue &LROpOut, + bool + IsEligibleForTailCallOptimization_64SVR4( + SDValue Callee, + CallingConv::ID CalleeCC, + ImmutableCallSite *CS, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + + SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff, + SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, - bool isDarwinABI, - SDLoc dl) const; + const SDLoc &dl) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; @@ -727,23 +805,18 @@ namespace llvm { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const; - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const; - SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const; - SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const; - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + const SDLoc &dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; @@ -763,26 +836,23 @@ namespace llvm { SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, + const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; - SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, - bool isVarArg, bool IsPatchPoint, bool hasNest, - SelectionDAG &DAG, - SmallVector<std::pair<unsigned, SDValue>, 8> - &RegsToPass, + SDValue FinishCall(CallingConv::ID CallConv, const SDLoc &dl, + bool isTailCall, bool isVarArg, bool isPatchPoint, + bool hasNest, SelectionDAG &DAG, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, SDValue Chain, SDValue CallSeqStart, - SDValue &Callee, - int SPDiff, unsigned NumBytes, + SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const; SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const override; + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, @@ -794,75 +864,66 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const override; - SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const override; - - SDValue - extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, - SDValue ArgVal, SDLoc dl) const; - - SDValue - LowerFormalArguments_Darwin(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - SDValue - LowerFormalArguments_64SVR4(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - SDValue - LowerFormalArguments_32SVR4(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - - SDValue - createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, - SDValue CallSeqStart, ISD::ArgFlagsTy Flags, - SelectionDAG &DAG, SDLoc dl) const; - - SDValue - LowerCall_Darwin(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, bool IsPatchPoint, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - ImmutableCallSite *CS) const; - SDValue - LowerCall_64SVR4(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, bool IsPatchPoint, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - ImmutableCallSite *CS) const; - SDValue - LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, bool IsPatchPoint, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - ImmutableCallSite *CS) const; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const override; + + SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, + SelectionDAG &DAG, SDValue ArgVal, + const SDLoc &dl) const; + + SDValue LowerFormalArguments_Darwin( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerFormalArguments_64SVR4( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerFormalArguments_32SVR4( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + + SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, + SDValue CallSeqStart, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + const SDLoc &dl) const; + + SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + ImmutableCallSite *CS) const; + SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + ImmutableCallSite *CS) const; + SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + ImmutableCallSite *CS) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 79e4fe379c2d..e7eb8a16180a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -244,12 +244,22 @@ def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), // (EH=1 - see Power ISA 2.07 Book II 4.4.2) def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT; + +let hasExtraDefRegAllocReq = 1 in +def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC), + "ldat $rD, $rA, $FC", IIC_LdStLoad>, isPPC64, + Requires<[IsISA3_0]>; } let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT; +let mayStore = 1, hasSideEffects = 0 in +def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC), + "stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64, + Requires<[IsISA3_0]>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi8 :Pseudo< (outs), @@ -476,8 +486,10 @@ defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "add", "$rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the -// initial-exec thread-local storage model. -def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), +// initial-exec thread-local storage model. We need to forbid r0 here - +// while it works for add just fine, the linker can relax this to local-exec +// addi, which won't work for r0. +def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB), "add $rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; @@ -502,11 +514,11 @@ let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), "subfic $rD, $rA, $imm", IIC_IntGeneral, [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>; -defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), +} +defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, PPC970_DGroup_Cracked; -} defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "subf", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; @@ -564,6 +576,14 @@ let isCompare = 1, hasSideEffects = 0 in { def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2), "cmpldi $dst, $src1, $src2", IIC_IntCompare>, isPPC64; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in + def CMPRB8 : X_BF3_L1_RS5_RS5<31, 192, (outs crbitrc:$BF), + (ins u1imm:$L, g8rc:$rA, g8rc:$rB), + "cmprb $BF, $L, $rA, $rB", IIC_IntCompare, []>, + Requires<[IsISA3_0]>; + def CMPEQB : X_BF3_RS5_RS5<31, 224, (outs crbitrc:$BF), + (ins g8rc:$rA, g8rc:$rB), "cmpeqb $BF, $rA, $rB", + IIC_IntCompare, []>, Requires<[IsISA3_0]>; } let hasSideEffects = 0 in { @@ -580,6 +600,9 @@ defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$rA), (ins g8rc:$rS), "cntlzw", "$rA, $rS", IIC_IntGeneral, []>; +defm CNTTZW8 : XForm_11r<31, 538, (outs g8rc:$rA), (ins g8rc:$rS), + "cnttzw", "$rA, $rS", IIC_IntGeneral, []>, + Requires<[IsISA3_0]>; defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS), "extsb", "$rA, $rS", IIC_IntSimple, @@ -613,9 +636,12 @@ defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS), defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; -defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), +defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), "cntlzd", "$rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctlz i64:$rS))]>; +defm CNTTZD : XForm_11r<31, 570, (outs g8rc:$rA), (ins g8rc:$rS), + "cnttzd", "$rA, $rS", IIC_IntGeneral, + [(set i64:$rA, (cttz i64:$rS))]>, Requires<[IsISA3_0]>; def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), "popcntd $rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctpop i64:$rS))]>; @@ -905,6 +931,10 @@ def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), "ldux $rD, $addr", IIC_LdStLDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; + +def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), + "ldmx $rD, $src", IIC_LdStLD, []>, isPPC64, + Requires<[IsISA3_0]>; } } @@ -1246,3 +1276,24 @@ def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>; def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>; def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>; + +let Predicates = [IsISA3_0] in { + +class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty, + InstrItinClass itin, list<dag> pattern> + : X_L1_RS5_RS5<opcode, xo, (outs), (ins ty:$rA, ty:$rB, u1imm:$L), + !strconcat(opc, " $rA, $rB, $L"), itin, pattern>; + +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +def CP_COPY8 : X_L1_RA5_RB5<31, 774, "copy" , g8rc, IIC_LdStCOPY, []>; +def CP_PASTE8 : X_L1_RA5_RB5<31, 902, "paste" , g8rc, IIC_LdStPASTE, []>; +def CP_PASTE8o : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isDOT; +} + +// SLB Invalidate Entry Global +def SLBIEG : XForm_26<31, 466, (outs), (ins gprc:$RS, gprc:$RB), + "slbieg $RS, $RB", IIC_SprSLBIEG, []>; +// SLB Synchronize +def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>; + +} // IsISA3_0 diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 53674681b213..e1c4673c2d7f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1213,3 +1213,187 @@ def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast", int_ppc_altivec_crypto_vncipherlast, v2i64>; def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; } // HasP8Crypto + +// The following altivec instructions were introduced in Power ISA 3.0 +def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">; +let Predicates = [HasP9Altivec] in { + +// Vector Compare Not Equal (Zero) +class P9VCMP<bits<10> xo, string asmstr, ValueType Ty> + : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr, + IIC_VecFPCompare, []>; +class P9VCMPo<bits<10> xo, string asmstr, ValueType Ty> + : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr, + IIC_VecFPCompare, []> { + let Defs = [CR6]; + let RC = 1; +} + +// i8 element comparisons. +def VCMPNEB : P9VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>; +def VCMPNEBo : P9VCMPo< 7, "vcmpneb. $vD, $vA, $vB" , v16i8>; +def VCMPNEZB : P9VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>; +def VCMPNEZBo : P9VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>; + +// i16 element comparisons. +def VCMPNEH : P9VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>; +def VCMPNEHo : P9VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>; +def VCMPNEZH : P9VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>; +def VCMPNEZHo : P9VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>; + +// i32 element comparisons. +def VCMPNEW : P9VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>; +def VCMPNEWo : P9VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>; +def VCMPNEZW : P9VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>; +def VCMPNEZWo : P9VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>; + +// VX-Form: [PO VRT / UIM VRB XO]. +// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent +// "/ UIM" (1 + 4 bit) +class VX1_VT5_UIM5_VB5<bits<11> xo, string opc, list<dag> pattern> + : VXForm_1<xo, (outs vrrc:$vD), (ins u4imm:$UIMM, vrrc:$vB), + !strconcat(opc, " $vD, $vB, $UIMM"), IIC_VecGeneral, pattern>; + +class VX1_RT5_RA5_VB5<bits<11> xo, string opc, list<dag> pattern> + : VXForm_1<xo, (outs g8rc:$rD), (ins g8rc:$rA, vrrc:$vB), + !strconcat(opc, " $rD, $rA, $vB"), IIC_VecGeneral, pattern>; + +// Vector Extract Unsigned +def VEXTRACTUB : VX1_VT5_UIM5_VB5<525, "vextractub", []>; +def VEXTRACTUH : VX1_VT5_UIM5_VB5<589, "vextractuh", []>; +def VEXTRACTUW : VX1_VT5_UIM5_VB5<653, "vextractuw", []>; +def VEXTRACTD : VX1_VT5_UIM5_VB5<717, "vextractd" , []>; + +// Vector Extract Unsigned Byte/Halfword/Word Left/Right-Indexed +def VEXTUBLX : VX1_RT5_RA5_VB5<1549, "vextublx", []>; +def VEXTUBRX : VX1_RT5_RA5_VB5<1805, "vextubrx", []>; +def VEXTUHLX : VX1_RT5_RA5_VB5<1613, "vextuhlx", []>; +def VEXTUHRX : VX1_RT5_RA5_VB5<1869, "vextuhrx", []>; +def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>; +def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>; + +// Vector Insert Element Instructions +def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>; +def VINSERTH : VX1_VT5_UIM5_VB5<845, "vinserth", []>; +def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>; +def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>; + +class VX_VT5_EO5_VB5<bits<11> xo, bits<5> eo, string opc, list<dag> pattern> + : VXForm_RD5_XO5_RS5<xo, eo, (outs vrrc:$vD), (ins vrrc:$vB), + !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>; + +// Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD] +def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs g8rc:$rD), (ins vrrc:$vB), + "vclzlsbb $rD, $vB", IIC_VecGeneral, []>; +def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs g8rc:$rD), (ins vrrc:$vB), + "vctzlsbb $rD, $vB", IIC_VecGeneral, []>; +// Vector Count Trailing Zeros +def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb", []>; +def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh", []>; +def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw", []>; +def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd", []>; + +// Vector Extend Sign +def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>; +def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>; +def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>; +def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>; +def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>; + +// Vector Integer Negate +def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", []>; +def VNEGD : VX_VT5_EO5_VB5<1538, 7, "vnegd", []>; + +// Vector Parity Byte +def VPRTYBW : VX_VT5_EO5_VB5<1538, 8, "vprtybw", []>; +def VPRTYBD : VX_VT5_EO5_VB5<1538, 9, "vprtybd", []>; +def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", []>; + +// Vector (Bit) Permute (Right-indexed) +def VBPERMD : VXForm_1<1484, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vbpermd $vD, $vA, $vB", IIC_VecFP, []>; +def VPERMR : VAForm_1a<59, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vpermr $vD, $vA, $vB, $vC", IIC_VecFP, []>; + +class VX1_VT5_VA5_VB5<bits<11> xo, string opc, list<dag> pattern> + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern>; + +// Vector Rotate Left Mask/Mask-Insert +def VRLWNM : VX1_VT5_VA5_VB5<389, "vrlwnm", []>; +def VRLWMI : VX1_VT5_VA5_VB5<133, "vrlwmi", []>; +def VRLDNM : VX1_VT5_VA5_VB5<453, "vrldnm", []>; +def VRLDMI : VX1_VT5_VA5_VB5<197, "vrldmi", []>; + +// Vector Shift Left/Right +def VSLV : VX1_VT5_VA5_VB5<1860, "vslv", []>; +def VSRV : VX1_VT5_VA5_VB5<1796, "vsrv", []>; + +// Vector Multiply-by-10 (& Write Carry) Unsigned Quadword +def VMUL10UQ : VXForm_BX<513, (outs vrrc:$vD), (ins vrrc:$vA), + "vmul10uq $vD, $vA", IIC_VecFP, []>; +def VMUL10CUQ : VXForm_BX< 1, (outs vrrc:$vD), (ins vrrc:$vA), + "vmul10cuq $vD, $vA", IIC_VecFP, []>; + +// Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword +def VMUL10EUQ : VX1_VT5_VA5_VB5<577, "vmul10euq" , []>; +def VMUL10ECUQ : VX1_VT5_VA5_VB5< 65, "vmul10ecuq", []>; + +// Decimal Integer Format Conversion Instructions + +// [PO VRT EO VRB 1 PS XO], "_o" means CR6 is set. +class VX_VT5_EO5_VB5_PS1_XO9_o<bits<5> eo, bits<9> xo, string opc, + list<dag> pattern> + : VX_RD5_EO5_RS5_PS1_XO9<eo, xo, (outs vrrc:$vD), (ins vrrc:$vB, u1imm:$PS), + !strconcat(opc, " $vD, $vB, $PS"), IIC_VecFP, pattern> { + let Defs = [CR6]; +} + +// [PO VRT EO VRB 1 / XO] +class VX_VT5_EO5_VB5_XO9_o<bits<5> eo, bits<9> xo, string opc, + list<dag> pattern> + : VX_RD5_EO5_RS5_PS1_XO9<eo, xo, (outs vrrc:$vD), (ins vrrc:$vB), + !strconcat(opc, " $vD, $vB"), IIC_VecFP, pattern> { + let Defs = [CR6]; + let PS = 0; +} + +// Decimal Convert From/to National/Zoned/Signed-QWord +def BCDCFNo : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>; +def BCDCFZo : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>; +def BCDCTNo : VX_VT5_EO5_VB5_XO9_o <5, 385, "bcdctn." , []>; +def BCDCTZo : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>; +def BCDCFSQo : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>; +def BCDCTSQo : VX_VT5_EO5_VB5_XO9_o <0, 385, "bcdctsq.", []>; + +// Decimal Copy-Sign/Set-Sign +let Defs = [CR6] in +def BCDCPSGNo : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>; + +def BCDSETSGNo : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>; + +// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set. +class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern> + : VX_RD5_RSp5_PS1_XO9<xo, + (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS), + !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> { + let Defs = [CR6]; +} + +// [PO VRT VRA VRB 1 / XO] +class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern> + : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> { + let Defs = [CR6]; + let PS = 0; +} + +// Decimal Shift/Unsigned-Shift/Shift-and-Round +def BCDSo : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>; +def BCDUSo : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>; +def BCDSRo : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>; + +// Decimal (Unsigned) Truncate +def BCDTRUNCo : VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>; +def BCDUTRUNCo : VX_VT5_VA5_VB5_XO9_o <321, "bcdutrunc.", []>; +} // end HasP9Altivec diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 4e03ed27653f..5acff7559544 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -360,6 +360,21 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, let Inst{30-31} = xo; } +// DQ-Form: [PO T RA DQ TX XO] or [PO S RA DQ SX XO] +class DQ_RD6_RS5_DQ12<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<17> DS_RA; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = DS_RA{16-12}; // Register # + let Inst{16-27} = DS_RA{11-0}; // Displacement. + let Inst{28} = XT{5}; + let Inst{29-31} = xo; +} // 1.7.6 X-Form class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, @@ -747,6 +762,107 @@ class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = RC; } +// [PO RT RA RB XO /] +class X_BF3_L1_RS5_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<1> L; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9} = 0; + let Inst{10} = L; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// Same as XForm_17 but with GPR's and new naming convention +class X_BF3_RS5_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// e.g. [PO VRT XO VRB XO /] or [PO VRT XO VRB XO RO] +class X_RD5_XO5_RS5<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let A = xo2; +} + +class X_BF3_DCMX7_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<7> DCMX; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-15} = DCMX; + let Inst{16-20} = VB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class X_RD6_IMM8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<8> IMM8; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-12} = 0; + let Inst{13-20} = IMM8; + let Inst{21-30} = xo; + let Inst{31} = XT{5}; +} + +// XForm_base_r3xo for instructions such as P9 atomics where we don't want +// to specify an SDAG pattern for matching. +class X_RD5_RS5_IM5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, []> { +} + +class X_BF3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XForm_17<opcode, xo, OOL, IOL, asmstr, itin> { + let FRA = 0; + let FRB = 0; +} + +// [PO /// L RA RB XO /] +class X_L1_RS5_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> { + let BF = 0; + let Pattern = pattern; + + bit RC = 0; + let Inst{31} = RC; +} + // XX*-Form (VSX) class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -820,6 +936,95 @@ class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } +class XX2_RD6_UIM5_RS6<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + bits<5> UIM5; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = UIM5; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +// [PO T XO B XO BX /] +class XX2_RD5_XO5_RS6<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = xo2; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +// [PO T XO B XO BX TX] +class XX2_RD6_XO5_RS6<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = xo2; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX2_BF3_DCMX7_RS6<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<7> DCMX; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-15} = DCMX; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class XX2_RD6_DCMX7_RS6<bits<6> opcode, bits<4> xo1, bits<3> xo2, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, + list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<7> DCMX; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = DCMX{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-24} = xo1; + let Inst{25} = DCMX{5}; + let Inst{26-28} = xo2; + let Inst{29} = DCMX{6}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { @@ -1571,6 +1776,21 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr, let Inst{21-31} = xo; } +// e.g. [PO VRT EO VRB XO] +class VXForm_RD5_XO5_RS5<bits<11> xo, bits<5> eo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-15} = eo; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + /// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX" class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -1622,6 +1842,44 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{22-31} = xo; } +// VX-Form: [PO VRT EO VRB 1 PS XO] +class VX_RD5_EO5_RS5_PS1_XO9<bits<5> eo, bits<9> xo, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VB; + bit PS; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = eo; + let Inst{16-20} = VB; + let Inst{21} = 1; + let Inst{22} = PS; + let Inst{23-31} = xo; +} + +// VX-Form: [PO VRT VRA VRB 1 PS XO] or [PO VRT VRA VRB 1 / XO] +class VX_RD5_RSp5_PS1_XO9<bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bit PS; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21} = 1; + let Inst{22} = PS; + let Inst{23-31} = xo; +} + // Z23-Form (used by QPX) class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index dcff6ad2486f..b6ae70ec1a2d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -93,6 +93,7 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, unsigned Directive = DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective(); + // FIXME: Leaving this as-is until we have POWER9 scheduling info if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8) return new PPCDispatchGroupSBHazardRecognizer(II, DAG); @@ -108,7 +109,7 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, } unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, + const MachineInstr &MI, unsigned *PredCost) const { if (!ItinData || UseOldLatencyCalc) return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); @@ -121,9 +122,9 @@ unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, // is an output). unsigned Latency = 1; - unsigned DefClass = MI->getDesc().getSchedClass(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + unsigned DefClass = MI.getDesc().getSchedClass(); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) continue; @@ -138,22 +139,22 @@ unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, + const MachineInstr &DefMI, unsigned DefIdx, + const MachineInstr &UseMI, unsigned UseIdx) const { int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); - if (!DefMI->getParent()) + if (!DefMI.getParent()) return Latency; - const MachineOperand &DefMO = DefMI->getOperand(DefIdx); + const MachineOperand &DefMO = DefMI.getOperand(DefIdx); unsigned Reg = DefMO.getReg(); bool IsRegCR; if (TargetRegisterInfo::isVirtualRegister(Reg)) { const MachineRegisterInfo *MRI = - &DefMI->getParent()->getParent()->getRegInfo(); + &DefMI.getParent()->getParent()->getRegInfo(); IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass); } else { @@ -161,7 +162,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, PPC::CRBITRCRegClass.contains(Reg); } - if (UseMI->isBranch() && IsRegCR) { + if (UseMI.isBranch() && IsRegCR) { if (Latency < 0) Latency = getInstrLatency(ItinData, DefMI); @@ -181,6 +182,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: + // FIXME: Is this needed for POWER9? Latency += 2; break; } @@ -258,10 +260,10 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, } } -unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, +unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { // Note: This list must be kept consistent with LoadRegFromStackSlot. - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: break; case PPC::LD: case PPC::LWZ: @@ -277,20 +279,20 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case PPC::RESTORE_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). - if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && - MI->getOperand(2).isFI()) { - FrameIndex = MI->getOperand(2).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && + MI.getOperand(2).isFI()) { + FrameIndex = MI.getOperand(2).getIndex(); + return MI.getOperand(0).getReg(); } break; } return 0; } -unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, +unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { // Note: This list must be kept consistent with StoreRegToStackSlot. - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: break; case PPC::STD: case PPC::STW: @@ -306,25 +308,23 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case PPC::SPILL_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). - if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && - MI->getOperand(2).isFI()) { - FrameIndex = MI->getOperand(2).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && + MI.getOperand(2).isFI()) { + FrameIndex = MI.getOperand(2).getIndex(); + return MI.getOperand(0).getReg(); } break; } return 0; } -MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI, - bool NewMI, +MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { - MachineFunction &MF = *MI->getParent()->getParent(); + MachineFunction &MF = *MI.getParent()->getParent(); // Normal instructions can be commuted the obvious way. - if (MI->getOpcode() != PPC::RLWIMI && - MI->getOpcode() != PPC::RLWIMIo) + if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMIo) return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because @@ -332,7 +332,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI, // to the high-bits of the mask (and, thus, the result). // Cannot commute if it has a non-zero rotate count. - if (MI->getOperand(3).getImm() != 0) + if (MI.getOperand(3).getImm() != 0) return nullptr; // If we have a zero rotate count, we have: @@ -345,28 +345,28 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI, // Swap op1/op2 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) && "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo."); - unsigned Reg0 = MI->getOperand(0).getReg(); - unsigned Reg1 = MI->getOperand(1).getReg(); - unsigned Reg2 = MI->getOperand(2).getReg(); - unsigned SubReg1 = MI->getOperand(1).getSubReg(); - unsigned SubReg2 = MI->getOperand(2).getSubReg(); - bool Reg1IsKill = MI->getOperand(1).isKill(); - bool Reg2IsKill = MI->getOperand(2).isKill(); + unsigned Reg0 = MI.getOperand(0).getReg(); + unsigned Reg1 = MI.getOperand(1).getReg(); + unsigned Reg2 = MI.getOperand(2).getReg(); + unsigned SubReg1 = MI.getOperand(1).getSubReg(); + unsigned SubReg2 = MI.getOperand(2).getSubReg(); + bool Reg1IsKill = MI.getOperand(1).isKill(); + bool Reg2IsKill = MI.getOperand(2).isKill(); bool ChangeReg0 = false; // If machine instrs are no longer in two-address forms, update // destination register as well. if (Reg0 == Reg1) { // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && + assert(MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); - assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch"); + assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch"); Reg2IsKill = false; ChangeReg0 = true; } // Masks. - unsigned MB = MI->getOperand(4).getImm(); - unsigned ME = MI->getOperand(5).getImm(); + unsigned MB = MI.getOperand(4).getImm(); + unsigned ME = MI.getOperand(5).getImm(); // We can't commute a trivial mask (there is no way to represent an all-zero // mask). @@ -375,40 +375,40 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI, if (NewMI) { // Create a new instruction. - unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); - bool Reg0IsDead = MI->getOperand(0).isDead(); - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg1IsKill)) - .addImm((ME+1) & 31) - .addImm((MB-1) & 31); + unsigned Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg(); + bool Reg0IsDead = MI.getOperand(0).isDead(); + return BuildMI(MF, MI.getDebugLoc(), MI.getDesc()) + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) + .addReg(Reg2, getKillRegState(Reg2IsKill)) + .addReg(Reg1, getKillRegState(Reg1IsKill)) + .addImm((ME + 1) & 31) + .addImm((MB - 1) & 31); } if (ChangeReg0) { - MI->getOperand(0).setReg(Reg2); - MI->getOperand(0).setSubReg(SubReg2); + MI.getOperand(0).setReg(Reg2); + MI.getOperand(0).setSubReg(SubReg2); } - MI->getOperand(2).setReg(Reg1); - MI->getOperand(1).setReg(Reg2); - MI->getOperand(2).setSubReg(SubReg1); - MI->getOperand(1).setSubReg(SubReg2); - MI->getOperand(2).setIsKill(Reg1IsKill); - MI->getOperand(1).setIsKill(Reg2IsKill); + MI.getOperand(2).setReg(Reg1); + MI.getOperand(1).setReg(Reg2); + MI.getOperand(2).setSubReg(SubReg1); + MI.getOperand(1).setSubReg(SubReg2); + MI.getOperand(2).setIsKill(Reg1IsKill); + MI.getOperand(1).setIsKill(Reg2IsKill); // Swap the mask around. - MI->getOperand(4).setImm((ME+1) & 31); - MI->getOperand(5).setImm((MB-1) & 31); - return MI; + MI.getOperand(4).setImm((ME + 1) & 31); + MI.getOperand(5).setImm((MB - 1) & 31); + return &MI; } -bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, +bool PPCInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { // For VSX A-Type FMA instructions, it is the first two operands that can be // commuted, however, because the non-encoded tied input operand is listed // first, the operands to swap are actually the second and third. - int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode()); + int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode()); if (AltOpc == -1) return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); @@ -428,6 +428,8 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */ + // FIXME: Update when POWER9 scheduling model is ready. + case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break; } DebugLoc DL; @@ -442,7 +444,8 @@ void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { // Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a // BDNZ (imm == 1) or BDZ (imm == 0) branch. -bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, +bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { @@ -453,14 +456,14 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, if (I == MBB.end()) return false; - if (!isUnpredicatedTerminator(I)) + if (!isUnpredicatedTerminator(*I)) return false; // Get the last instruction in the block. MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { if (LastInst->getOpcode() == PPC::B) { if (!LastInst->getOperand(0).isMBB()) return true; @@ -522,8 +525,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) return true; // If the block ends with PPC::B and PPC:BCC, handle it. @@ -633,11 +635,11 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -unsigned -PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - ArrayRef<MachineOperand> Cond, - DebugLoc DL) const { +unsigned PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, + const DebugLoc &DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -721,9 +723,10 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, } void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc dl, - unsigned DestReg, ArrayRef<MachineOperand> Cond, - unsigned TrueReg, unsigned FalseReg) const { + MachineBasicBlock::iterator MI, + const DebugLoc &dl, unsigned DestReg, + ArrayRef<MachineOperand> Cond, unsigned TrueReg, + unsigned FalseReg) const { assert(Cond.size() == 2 && "PPC branch conditions have two components!"); @@ -746,8 +749,8 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL; auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm()); - unsigned SubIdx; - bool SwapOps; + unsigned SubIdx = 0; + bool SwapOps = false; switch (SelectPred) { case PPC::PRED_EQ: case PPC::PRED_EQ_MINUS: @@ -835,9 +838,9 @@ static unsigned getCRBitValue(unsigned CRBit) { } void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { // We can end up with self copies and similar things as a result of VSX copy // legalization. Promote them here. const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -883,8 +886,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (PPC::CRBITRCRegClass.contains(SrcReg) && PPC::GPRCRegClass.contains(DestReg)) { unsigned CRReg = getCRFromCRBit(SrcReg); - BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg) - .addReg(CRReg), getKillRegState(KillSrc); + BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg); + getKillRegState(KillSrc); // Rotate the CR bit in the CR fields to be the least significant bit and // then mask with 0x1 (MB = ME = 31). BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg) @@ -895,13 +898,13 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } else if (PPC::CRRCRegClass.contains(SrcReg) && PPC::G8RCRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg) - .addReg(SrcReg), getKillRegState(KillSrc); + BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg).addReg(SrcReg); + getKillRegState(KillSrc); return; } else if (PPC::CRRCRegClass.contains(SrcReg) && PPC::GPRCRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg) - .addReg(SrcReg), getKillRegState(KillSrc); + BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg); + getKillRegState(KillSrc); return; } @@ -1085,12 +1088,11 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, NewMIs.back()->addMemOperand(MF, MMO); } -bool -PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs, - bool &NonRI, bool &SpillsVRS) const{ +bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr *> &NewMIs, + bool &NonRI, bool &SpillsVRS) const { // Note: If additional load instructions are added here, // update isLoadFromStackSlot. @@ -1208,35 +1210,35 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } -bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const { +bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const { // For some instructions, it is legal to fold ZERO into the RA register field. // A zero immediate should always be loaded with a single li. - unsigned DefOpc = DefMI->getOpcode(); + unsigned DefOpc = DefMI.getOpcode(); if (DefOpc != PPC::LI && DefOpc != PPC::LI8) return false; - if (!DefMI->getOperand(1).isImm()) + if (!DefMI.getOperand(1).isImm()) return false; - if (DefMI->getOperand(1).getImm() != 0) + if (DefMI.getOperand(1).getImm() != 0) return false; // Note that we cannot here invert the arguments of an isel in order to fold // a ZERO into what is presented as the second argument. All we have here // is the condition bit, and that might come from a CR-logical bit operation. - const MCInstrDesc &UseMCID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI.getDesc(); // Only fold into real machine instructions. if (UseMCID.isPseudo()) return false; unsigned UseIdx; - for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx) - if (UseMI->getOperand(UseIdx).isReg() && - UseMI->getOperand(UseIdx).getReg() == Reg) + for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx) + if (UseMI.getOperand(UseIdx).isReg() && + UseMI.getOperand(UseIdx).getReg() == Reg) break; - assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI"); + assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI"); assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg"); const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx]; @@ -1268,10 +1270,10 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, } bool DeleteDef = MRI->hasOneNonDBGUse(Reg); - UseMI->getOperand(UseIdx).setReg(ZeroReg); + UseMI.getOperand(UseIdx).setReg(ZeroReg); if (DeleteDef) - DefMI->eraseFromParent(); + DefMI.eraseFromParent(); return true; } @@ -1299,7 +1301,7 @@ bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, } -bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const { +bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { // The predicated branches are identified by their type, not really by the // explicit presence of a predicate. Furthermore, some of them can be // predicated more than once. Because if conversion won't try to predicate @@ -1310,73 +1312,71 @@ bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const { return false; } -bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - if (!MI->isTerminator()) +bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { + if (!MI.isTerminator()) return false; // Conditional branch is a special case. - if (MI->isBranch() && !MI->isBarrier()) + if (MI.isBranch() && !MI.isBarrier()) return true; return !isPredicated(MI); } -bool PPCInstrInfo::PredicateInstruction(MachineInstr *MI, +bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { - unsigned OpC = MI->getOpcode(); + unsigned OpC = MI.getOpcode(); if (OpC == PPC::BLR || OpC == PPC::BLR8) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { bool isPPC64 = Subtarget.isPPC64(); - MI->setDesc(get(Pred[0].getImm() ? - (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : - (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); + MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) + : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { - MI->setDesc(get(PPC::BCLR)); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addReg(Pred[1].getReg()); + MI.setDesc(get(PPC::BCLR)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { - MI->setDesc(get(PPC::BCLRn)); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addReg(Pred[1].getReg()); + MI.setDesc(get(PPC::BCLRn)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); } else { - MI->setDesc(get(PPC::BCCLR)); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addImm(Pred[0].getImm()) - .addReg(Pred[1].getReg()); + MI.setDesc(get(PPC::BCCLR)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); } return true; } else if (OpC == PPC::B) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { bool isPPC64 = Subtarget.isPPC64(); - MI->setDesc(get(Pred[0].getImm() ? - (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : - (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); + MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) + : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { - MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); - MI->RemoveOperand(0); + MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); + MI.RemoveOperand(0); - MI->setDesc(get(PPC::BC)); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addReg(Pred[1].getReg()) - .addMBB(MBB); + MI.setDesc(get(PPC::BC)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(Pred[1].getReg()) + .addMBB(MBB); } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { - MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); - MI->RemoveOperand(0); + MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); + MI.RemoveOperand(0); - MI->setDesc(get(PPC::BCn)); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addReg(Pred[1].getReg()) - .addMBB(MBB); + MI.setDesc(get(PPC::BCn)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(Pred[1].getReg()) + .addMBB(MBB); } else { - MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); - MI->RemoveOperand(0); - - MI->setDesc(get(PPC::BCC)); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addImm(Pred[0].getImm()) - .addReg(Pred[1].getReg()) - .addMBB(MBB); + MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); + MI.RemoveOperand(0); + + MI.setDesc(get(PPC::BCC)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()) + .addMBB(MBB); } return true; @@ -1389,24 +1389,24 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr *MI, bool isPPC64 = Subtarget.isPPC64(); if (Pred[0].getImm() == PPC::PRED_BIT_SET) { - MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : - (setLR ? PPC::BCCTRL : PPC::BCCTR))); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addReg(Pred[1].getReg()); + MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) + : (setLR ? PPC::BCCTRL : PPC::BCCTR))); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); return true; } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { - MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) : - (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addReg(Pred[1].getReg()); + MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) + : (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); return true; } - MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) : - (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addImm(Pred[0].getImm()) - .addReg(Pred[1].getReg()); + MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) + : (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); return true; } @@ -1444,7 +1444,7 @@ bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, return false; } -bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI, +bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred) const { // Note: At the present time, the contents of Pred from this function is // unused by IfConversion. This implementation follows ARM by pushing the @@ -1457,8 +1457,8 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI, &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass }; bool Found = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) { const TargetRegisterClass *RC = RCs[c]; if (MO.isReg()) { @@ -1480,8 +1480,8 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI, return Found; } -bool PPCInstrInfo::isPredicable(MachineInstr *MI) const { - unsigned OpC = MI->getOpcode(); +bool PPCInstrInfo::isPredicable(MachineInstr &MI) const { + unsigned OpC = MI.getOpcode(); switch (OpC) { default: return false; @@ -1496,10 +1496,10 @@ bool PPCInstrInfo::isPredicable(MachineInstr *MI) const { } } -bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const { - unsigned Opc = MI->getOpcode(); +bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, + int &Value) const { + unsigned Opc = MI.getOpcode(); switch (Opc) { default: return false; @@ -1507,9 +1507,9 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI, case PPC::CMPLWI: case PPC::CMPDI: case PPC::CMPLDI: - SrcReg = MI->getOperand(1).getReg(); + SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; - Value = MI->getOperand(2).getImm(); + Value = MI.getOperand(2).getImm(); Mask = 0xFFFF; return true; case PPC::CMPW: @@ -1518,21 +1518,20 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI, case PPC::CMPLD: case PPC::FCMPUS: case PPC::FCMPUD: - SrcReg = MI->getOperand(1).getReg(); - SrcReg2 = MI->getOperand(2).getReg(); + SrcReg = MI.getOperand(1).getReg(); + SrcReg2 = MI.getOperand(2).getReg(); return true; } } -bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, - unsigned SrcReg, unsigned SrcReg2, - int Mask, int Value, +bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const { if (DisableCmpOpt) return false; - int OpC = CmpInstr->getOpcode(); - unsigned CRReg = CmpInstr->getOperand(0).getReg(); + int OpC = CmpInstr.getOpcode(); + unsigned CRReg = CmpInstr.getOperand(0).getReg(); // FP record forms set CR1 based on the execption status bits, not a // comparison with zero. @@ -1571,11 +1570,18 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, } else return false; } else if (is32BitUnsignedCompare) { + // 32-bit rotate and mask instructions are zero extending only if MB <= ME + bool isZeroExtendingRotate = + (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINMo || + MIOpC == PPC::RLWNM || MIOpC == PPC::RLWNMo) + && MI->getOperand(3).getImm() <= MI->getOperand(4).getImm(); + // We can perform this optimization, equality only, if MI is // zero-extending. if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo || MIOpC == PPC::SLW || MIOpC == PPC::SLWo || - MIOpC == PPC::SRW || MIOpC == PPC::SRWo) { + MIOpC == PPC::SRW || MIOpC == PPC::SRWo || + isZeroExtendingRotate) { noSub = true; equalityOnly = true; } else @@ -1608,8 +1614,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, MachineBasicBlock::iterator I = CmpInstr; // Scan forward to find the first use of the compare. - for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end(); - I != EL; ++I) { + for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL; + ++I) { bool FoundUse = false; for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end(); J != JE; ++J) @@ -1633,7 +1639,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, // same BB as the comparison. This is to allow the check below to avoid calls // (and other explicit clobbers); instead we should really check for these // more explicitly (in at least a few predecessors). - else if (MI->getParent() != CmpInstr->getParent() || Value != 0) { + else if (MI->getParent() != CmpInstr.getParent() || Value != 0) { // PPC does not have a record-form SUBri. return false; } @@ -1643,16 +1649,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, --I; // Get ready to iterate backward from CmpInstr. - MachineBasicBlock::iterator E = MI, - B = CmpInstr->getParent()->begin(); + MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin(); for (; I != E && !noSub; --I) { const MachineInstr &Instr = *I; unsigned IOpC = Instr.getOpcode(); - if (&*I != CmpInstr && ( - Instr.modifiesRegister(PPC::CR0, TRI) || - Instr.readsRegister(PPC::CR0, TRI))) + if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) || + Instr.readsRegister(PPC::CR0, TRI))) // This instruction modifies or uses the record condition register after // the one we want to change. While we could do this transformation, it // would likely not be profitable. This transformation removes one @@ -1752,13 +1756,17 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, // Create a new virtual register to hold the value of the CR set by the // record-form instruction. If the instruction was not previously in // record form, then set the kill flag on the CR. - CmpInstr->eraseFromParent(); + CmpInstr.eraseFromParent(); MachineBasicBlock::iterator MII = MI; BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(), get(TargetOpcode::COPY), CRReg) .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0); + // Even if CR0 register were dead before, it is alive now since the + // instruction we just built uses it. + MI->clearRegisterDeads(PPC::CR0); + if (MIOpC != NewOpC) { // We need to be careful here: we're replacing one instruction with // another, and we need to make sure that we get all of the right @@ -1783,6 +1791,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, MI->addOperand(*MI->getParent()->getParent(), MachineOperand::CreateReg(*ImpUses, false, true)); } + assert(MI->definesRegister(PPC::CR0) && + "Record-form instruction does not define cr0?"); // Modify the condition code of operands in OperandsToUpdate. // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to @@ -1799,17 +1809,17 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// -unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - unsigned Opcode = MI->getOpcode(); +unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); if (Opcode == PPC::INLINEASM) { - const MachineFunction *MF = MI->getParent()->getParent(); - const char *AsmStr = MI->getOperand(0).getSymbolName(); + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } else if (Opcode == TargetOpcode::STACKMAP) { - return MI->getOperand(1).getImm(); + return MI.getOperand(1).getImm(); } else if (Opcode == TargetOpcode::PATCHPOINT) { - PatchPointOpers Opers(MI); + PatchPointOpers Opers(&MI); return Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); } else { const MCInstrDesc &Desc = get(Opcode); @@ -1842,10 +1852,26 @@ ArrayRef<std::pair<unsigned, const char *>> PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { using namespace PPCII; static const std::pair<unsigned, const char *> TargetFlags[] = { - {MO_PLT_OR_STUB, "ppc-plt-or-stub"}, + {MO_PLT, "ppc-plt"}, {MO_PIC_FLAG, "ppc-pic"}, {MO_NLP_FLAG, "ppc-nlp"}, {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}}; return makeArrayRef(TargetFlags); } +bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + switch (MI.getOpcode()) { + case TargetOpcode::LOAD_STACK_GUARD: { + assert(Subtarget.isTargetLinux() && + "Only Linux target is expected to contain LOAD_STACK_GUARD"); + const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; + const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; + MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Offset) + .addReg(Reg); + return true; + } + } + return false; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index c3c3a480a6aa..98baf125bdff 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -73,10 +73,10 @@ class PPCInstrInfo : public PPCGenInstrInfo { const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs, bool &NonRI, bool &SpillsVRS) const; - bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + bool LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs, + SmallVectorImpl<MachineInstr *> &NewMIs, bool &NonRI, bool &SpillsVRS) const; virtual void anchor(); @@ -91,8 +91,7 @@ protected: /// /// For example, we can commute rlwimi instructions, but only if the /// rotate amt is zero. We also have to munge the immediates a bit. - MachineInstr *commuteInstructionImpl(MachineInstr *MI, - bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override; @@ -113,12 +112,12 @@ public: const ScheduleDAG *DAG) const override; unsigned getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, + const MachineInstr &MI, unsigned *PredCost = nullptr) const override; int getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, + const MachineInstr &DefMI, unsigned DefIdx, + const MachineInstr &UseMI, unsigned UseIdx) const override; int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, @@ -128,7 +127,7 @@ public: } bool hasLowDefLatency(const TargetSchedModel &SchedModel, - const MachineInstr *DefMI, + const MachineInstr &DefMI, unsigned DefIdx) const override { // Machine LICM should hoist all instructions in low-register-pressure // situations; none are sufficiently free to justify leaving in a loop @@ -152,12 +151,12 @@ public: bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override; - unsigned isLoadFromStackSlot(const MachineInstr *MI, + unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - unsigned isStoreToStackSlot(const MachineInstr *MI, + unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; void insertNoop(MachineBasicBlock &MBB, @@ -165,25 +164,25 @@ public: // Branch analysis. - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override; unsigned RemoveBranch(MachineBasicBlock &MBB) const override; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - DebugLoc DL) const override; + const DebugLoc &DL) const override; // Select analysis. bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, unsigned, unsigned, int &, int &, int &) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc DL, unsigned DstReg, ArrayRef<MachineOperand> Cond, - unsigned TrueReg, unsigned FalseReg) const override; + const DebugLoc &DL, unsigned DstReg, + ArrayRef<MachineOperand> Cond, unsigned TrueReg, + unsigned FalseReg) const override; - void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, @@ -201,8 +200,8 @@ public: bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; - bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const override; + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, + MachineRegisterInfo *MRI) const override; // If conversion by predication (only supported by some branch instructions). // All of the profitability checks always return true; it is always @@ -230,37 +229,34 @@ public: } // Predication support. - bool isPredicated(const MachineInstr *MI) const override; + bool isPredicated(const MachineInstr &MI) const override; - bool isUnpredicatedTerminator(const MachineInstr *MI) const override; + bool isUnpredicatedTerminator(const MachineInstr &MI) const override; - bool PredicateInstruction(MachineInstr *MI, + bool PredicateInstruction(MachineInstr &MI, ArrayRef<MachineOperand> Pred) const override; bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, ArrayRef<MachineOperand> Pred2) const override; - bool DefinesPredicate(MachineInstr *MI, + bool DefinesPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred) const override; - bool isPredicable(MachineInstr *MI) const override; + bool isPredicable(MachineInstr &MI) const override; // Comparison optimization. + bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, int &Value) const override; - bool analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const override; - - bool optimizeCompareInstr(MachineInstr *CmpInstr, - unsigned SrcReg, unsigned SrcReg2, - int Mask, int Value, + bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const override; /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// - unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + unsigned GetInstSizeInBytes(const MachineInstr &MI) const; void getNoopForMachoTarget(MCInst &NopInst) const override; @@ -272,6 +268,9 @@ public: ArrayRef<std::pair<unsigned, const char *>> getSerializableBitmaskMachineOperandTargetFlags() const override; + + // Lower pseudo instructions after register allocation. + bool expandPostRAPseudo(MachineInstr &MI) const override; }; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index ce0f9e6f52a7..4a42a947c6cb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -31,6 +31,18 @@ def SDT_PPCvperm : SDTypeProfile<1, 3, [ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> ]>; +def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisInt<2> +]>; + +def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + +def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; @@ -140,7 +152,10 @@ def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR", def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; -def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; +def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>; @@ -257,7 +272,7 @@ def HI16 : SDNodeXForm<imm, [{ def HA16 : SDNodeXForm<imm, [{ // Transformation function: shift the immediate value down into the low bits. - signed int Val = N->getZExtValue(); + int Val = N->getZExtValue(); return getI32Imm((Val - (signed short)Val) >> 16, SDLoc(N)); }]>; def MB : SDNodeXForm<imm, [{ @@ -507,6 +522,24 @@ def u6imm : Operand<i32> { let ParserMatchClass = PPCU6ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<6>"; } +def PPCU7ImmAsmOperand : AsmOperandClass { + let Name = "U7Imm"; let PredicateMethod = "isU7Imm"; + let RenderMethod = "addImmOperands"; +} +def u7imm : Operand<i32> { + let PrintMethod = "printU7ImmOperand"; + let ParserMatchClass = PPCU7ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<7>"; +} +def PPCU8ImmAsmOperand : AsmOperandClass { + let Name = "U8Imm"; let PredicateMethod = "isU8Imm"; + let RenderMethod = "addImmOperands"; +} +def u8imm : Operand<i32> { + let PrintMethod = "printU8ImmOperand"; + let ParserMatchClass = PPCU8ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<8>"; +} def PPCU10ImmAsmOperand : AsmOperandClass { let Name = "U10Imm"; let PredicateMethod = "isU10Imm"; let RenderMethod = "addImmOperands"; @@ -635,6 +668,13 @@ def PPCDispRIXOperand : AsmOperandClass { def dispRIX : Operand<iPTR> { let ParserMatchClass = PPCDispRIXOperand; } +def PPCDispRIX16Operand : AsmOperandClass { + let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16"; + let RenderMethod = "addImmOperands"; +} +def dispRIX16 : Operand<iPTR> { + let ParserMatchClass = PPCDispRIX16Operand; +} def PPCDispSPE8Operand : AsmOperandClass { let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8"; let RenderMethod = "addImmOperands"; @@ -673,6 +713,12 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned. let EncoderMethod = "getMemRIXEncoding"; let DecoderMethod = "decodeMemRIXOperands"; } +def memrix16 : Operand<iPTR> { // memri, imm is 16-aligned, 12-bit, Inst{16:27} + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRIX16Encoding"; + let DecoderMethod = "decodeMemRIX16Operands"; +} def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg); @@ -746,6 +792,7 @@ def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">; def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; +def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -1365,7 +1412,10 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { Requires<[In32BitMode]>; } -let isBranch = 1, isTerminator = 1 in { +// This pseudo is never removed from the function, as it serves as +// a terminator. Size is set to 0 to prevent the builtin assembler +// from emitting it. +let isBranch = 1, isTerminator = 1, Size = 0 in { def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), "#EH_SjLj_Setup\t$dst", []>; } @@ -1543,6 +1593,13 @@ def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src), def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT; + +// The atomic instructions use the destination register as well as the next one +// or two registers in order (modulo 31). +let hasExtraSrcRegAllocReq = 1 in +def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC), + "lwat $rD, $rA, $FC", IIC_LdStLoad>, + Requires<[IsISA3_0]>; } let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in { @@ -1558,6 +1615,11 @@ def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT; } +let mayStore = 1, hasSideEffects = 0 in +def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$rS, gprc:$rA, u5imm:$FC), + "stwat $rS, $rA, $FC", IIC_LdStStore>, + Requires<[IsISA3_0]>; + let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; @@ -1947,6 +2009,10 @@ let isCompare = 1, hasSideEffects = 0 in { "cmpwi $crD, $rA, $imm", IIC_IntCompare>; def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2), "cmplwi $dst, $src1, $src2", IIC_IntCompare>; + def CMPRB : X_BF3_L1_RS5_RS5<31, 192, (outs crbitrc:$BF), + (ins u1imm:$L, g8rc:$rA, g8rc:$rB), + "cmprb $BF, $L, $rA, $rB", IIC_IntCompare, []>, + Requires<[IsISA3_0]>; } } @@ -2000,6 +2066,9 @@ defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH), defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS), "cntlzw", "$rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctlz i32:$rS))]>; +defm CNTTZW : XForm_11r<31, 538, (outs gprc:$rA), (ins gprc:$rS), + "cnttzw", "$rA, $rS", IIC_IntGeneral, + [(set i32:$rA, (cttz i32:$rS))]>, Requires<[IsISA3_0]>; defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS), "extsb", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; @@ -2286,6 +2355,10 @@ let isCodeGenOnly = 1 in { PPC970_DGroup_First, PPC970_Unit_FXU; } +// Aliases for mtvrsave/mfvrsave to mfspr/mtspr. +def : InstAlias<"mtvrsave $rS", (MTVRSAVE gprc:$rS)>; +def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>; + // SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, // so we'll need to scavenge a register for it. let mayStore = 1 in @@ -2328,6 +2401,9 @@ def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; } // hasExtraSrcRegAllocReq = 1 + +def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins), + "mcrxrx $BF", IIC_BrMCRX>, Requires<[IsISA3_0]>; } // hasSideEffects = 0 // Pseudo instruction to perform FADD in round-to-zero mode. @@ -4138,3 +4214,33 @@ def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>; def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>; def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>; def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>; + +let Predicates = [IsISA3_0] in { + +// Copy-Paste Facility +// We prefix 'CP' to COPY due to name conflict in Target.td. We also prefix to +// PASTE for naming consistency. +let mayLoad = 1 in +def CP_COPY : X_L1_RA5_RB5<31, 774, "copy" , gprc, IIC_LdStCOPY, []>; + +let mayStore = 1 in +def CP_PASTE : X_L1_RA5_RB5<31, 902, "paste" , gprc, IIC_LdStPASTE, []>; + +let mayStore = 1, Defs = [CR0] in +def CP_PASTEo : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isDOT; + +def CP_COPYx : PPCAsmPseudo<"copy $rA, $rB" , (ins gprc:$rA, gprc:$rB)>; +def CP_PASTEx : PPCAsmPseudo<"paste $rA, $rB", (ins gprc:$rA, gprc:$rB)>; +def CP_COPY_FIRST : PPCAsmPseudo<"copy_first $rA, $rB", + (ins gprc:$rA, gprc:$rB)>; +def CP_PASTE_LAST : PPCAsmPseudo<"paste_last $rA, $rB", + (ins gprc:$rA, gprc:$rB)>; +def CP_ABORT : XForm_0<31, 838, (outs), (ins), "cp_abort", IIC_SprABORT, []>; + +// Message Synchronize +def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>; + +// Power-Saving Mode Instruction: +def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>; + +} // IsISA3_0 diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index df1142cb42f3..a02ace00a76f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -57,6 +57,9 @@ def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [ def SDT_PPCxxswapd : SDTypeProfile<1, 1, [ SDTCisSameAs<0, 1> ]>; +def SDTVecConv : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2> +]>; def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad]>; @@ -66,6 +69,9 @@ def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; +def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; +def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; +def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -608,7 +614,8 @@ let Uses = [RM] in { "xvcvsxwdp $XT, $XB", IIC_VecFP, []>; def XVCVSXWSP : XX2Form<60, 184, (outs vsrc:$XT), (ins vsrc:$XB), - "xvcvsxwsp $XT, $XB", IIC_VecFP, []>; + "xvcvsxwsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (sint_to_fp v4i32:$XB))]>; def XVCVUXDDP : XX2Form<60, 488, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvuxddp $XT, $XB", IIC_VecFP, @@ -772,10 +779,14 @@ let Uses = [RM] in { def XXSLDWI : XX3Form_2<60, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW), - "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>; + "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, + [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB, + imm32SExt16:$SHW))]>; def XXSPLTW : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), - "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, + [(set v4i32:$XT, + (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; } // hasSideEffects // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after @@ -926,6 +937,16 @@ def : Pat<(sext_inreg v2i64:$C, v2i32), def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))), (XVCVSXWDP (XXSLDWI $C, $C, 1))>; +def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)), + (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>; +def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)), + (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>; + +def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), + (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>; +def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), + (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; + // Loads. def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -945,6 +966,7 @@ def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), @@ -1007,6 +1029,28 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), (XVRSQRTEDP $A)>; +let Predicates = [IsLittleEndian] in { +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; +} // IsLittleEndian + +let Predicates = [IsBigEndian] in { +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +} // IsBigEndian + } // AddedComplexity } // HasVSX @@ -1213,10 +1257,31 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), "xscvspdpn $XT, $XB", IIC_VecFP, []>; + let Predicates = [IsLittleEndian] in { + def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; + } + + let Predicates = [IsBigEndian] in { + def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; + def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + } } // AddedComplexity = 400 } // HasP8Vector -let Predicates = [HasDirectMove, HasVSX] in { +let Predicates = [HasDirectMove] in { // VSX direct move instructions def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), "mfvsrd $rA, $XT", IIC_VecGeneral, @@ -1235,7 +1300,22 @@ let Predicates = [HasDirectMove, HasVSX] in { def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; -} // HasDirectMove, HasVSX +} // HasDirectMove + +let Predicates = [IsISA3_0, HasDirectMove] in { + def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrws $XT, $rA", IIC_VecGeneral, + []>; + + def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), + "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral, + []>, Requires<[In64BitMode]>; + + def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT), + "mfvsrld $rA, $XT", IIC_VecGeneral, + []>, Requires<[In64BitMode]>; + +} // IsISA3_0, HasDirectMove /* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than @@ -1285,7 +1365,7 @@ def VectorExtractions { (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); // Word extraction - dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64)); + dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64)); dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); @@ -1555,7 +1635,7 @@ let Predicates = [IsBigEndian, HasP8Vector] in { def : Pat<(f32 (vector_extract v4f32:$S, 1)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 2)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), @@ -1660,7 +1740,7 @@ let Predicates = [IsLittleEndian, HasP8Vector] in { def : Pat<(f32 (vector_extract v4f32:$S, 0)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, 1)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 2)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), @@ -1783,3 +1863,423 @@ def : Pat<(i64 (bitconvert f64:$S)), def : Pat<(f64 (bitconvert i64:$S)), (f64 (MTVSRD $S))>; } + +def AlignValues { + dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); + dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); +} + +// The following VSX instructions were introduced in Power ISA 3.0 +def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; +let AddedComplexity = 400, Predicates = [HasP9Vector] in { + + // [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + list<dag> pattern> + : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB), + !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; + + // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + list<dag> pattern> + : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isDOT; + + // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less), + // So we use different operand class for VRB + class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + RegisterOperand vbtype, list<dag> pattern> + : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB), + !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; + + // [PO T XO B XO BX /] + class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc, + list<dag> pattern> + : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB), + !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>; + + // [PO T XO B XO BX TX] + class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc, + RegisterOperand vtype, list<dag> pattern> + : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB), + !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>; + + // [PO T A B XO AX BX TX], src and dest register use different operand class + class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc, + RegisterOperand xty, RegisterOperand aty, RegisterOperand bty, + InstrItinClass itin, list<dag> pattern> + : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB), + !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>; + + // [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>; + + // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT; + + //===--------------------------------------------------------------------===// + // Quad-Precision Scalar Move Instructions: + + // Copy Sign + def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", []>; + + // Absolute/Negative-Absolute/Negate + def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp" , []>; + def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp", []>; + def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp" , []>; + + //===--------------------------------------------------------------------===// + // Quad-Precision Scalar Floating-Point Arithmetic Instructions: + + // Add/Divide/Multiply/Subtract + def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp" , []>; + def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", []>; + def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp" , []>; + def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", []>; + def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp" , []>; + def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo", []>; + def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , []>; + def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", []>; + + // Square-Root + def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp" , []>; + def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", []>; + + // (Negative) Multiply-{Add/Subtract} + def XSMADDQP : X_VT5_VA5_VB5 <63, 388, "xsmaddqp" , []>; + def XSMADDQPO : X_VT5_VA5_VB5_Ro<63, 388, "xsmaddqpo" , []>; + def XSMSUBQP : X_VT5_VA5_VB5 <63, 420, "xsmsubqp" , []>; + def XSMSUBQPO : X_VT5_VA5_VB5_Ro<63, 420, "xsmsubqpo" , []>; + def XSNMADDQP : X_VT5_VA5_VB5 <63, 452, "xsnmaddqp" , []>; + def XSNMADDQPO: X_VT5_VA5_VB5_Ro<63, 452, "xsnmaddqpo", []>; + def XSNMSUBQP : X_VT5_VA5_VB5 <63, 484, "xsnmsubqp" , []>; + def XSNMSUBQPO: X_VT5_VA5_VB5_Ro<63, 484, "xsnmsubqpo", []>; + + //===--------------------------------------------------------------------===// + // Quad/Double-Precision Compare Instructions: + + // [PO BF // VRA VRB XO /] + class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB), + !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> { + let Pattern = pattern; + } + + // QP Compare Ordered/Unordered + def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; + def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; + + // DP/QP Compare Exponents + def XSCMPEXPDP : XX3Form_1<60, 59, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; + + // DP Compare ==, >=, >, != + // Use vsrc for XT, because the entire register of XT is set. + // XT.dword[1] = 0x0000_0000_0000_0000 + def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + // Vector Compare Not Equal + def XVCMPNEDP : XX3Form_Rc<60, 123, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpnedp $XT, $XA, $XB", IIC_VecFPCompare, []>; + let Defs = [CR6] in + def XVCMPNEDPo : XX3Form_Rc<60, 123, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpnedp. $XT, $XA, $XB", IIC_VecFPCompare, []>, + isDOT; + def XVCMPNESP : XX3Form_Rc<60, 91, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpnesp $XT, $XA, $XB", IIC_VecFPCompare, []>; + let Defs = [CR6] in + def XVCMPNESPo : XX3Form_Rc<60, 91, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>, + isDOT; + + //===--------------------------------------------------------------------===// + // Quad-Precision Floating-Point Conversion Instructions: + + // Convert DP -> QP + def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>; + + // Round & Convert QP -> DP (dword[1] is set to zero) + def XSCVQPDP : X_VT5_XO5_VB5 <63, 20, 836, "xscvqpdp" , []>; + def XSCVQPDPO : X_VT5_XO5_VB5_Ro<63, 20, 836, "xscvqpdpo", []>; + + // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) + def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; + def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>; + def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>; + def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; + + // Convert (Un)Signed DWord -> QP + def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>; + def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vsfrc, []>; + + //===--------------------------------------------------------------------===// + // Round to Floating-Point Integer Instructions + + // (Round &) Convert DP <-> HP + // Note! xscvdphp's src and dest register both use the left 64 bits, so we use + // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits, + // but we still use vsfrc for it. + def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>; + def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>; + + // Vector HP -> SP + def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>; + def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, []>; + + class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc, + list<dag> pattern> + : Z23Form_1<opcode, xo, + (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc), + !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> { + let RC = ex; + } + + // Round to Quad-Precision Integer [with Inexact] + def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; + def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; + + // Round Quad-Precision to Double-Extended Precision (fp80) + def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>; + + //===--------------------------------------------------------------------===// + // Insert/Extract Instructions + + // Insert Exponent DP/QP + // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU + def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), + "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>; + // vB NOTE: only vB.dword[0] is used, that's why we don't use + // X_VT5_VA5_VB5 form + def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), + "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>; + + // Extract Exponent/Significand DP/QP + def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; + def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; + def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; + def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; + + // Vector Insert Word + // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. + def XXINSERTW : + XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT), + (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM), + "xxinsertw $XT, $XB, $UIM", IIC_VecFP, + [(set v4i32:$XT, (PPCxxinsert v4i32:$XTi, v4i32:$XB, + imm32SExt16:$UIM))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + + // Vector Extract Unsigned Word + def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, + (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM), + "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; + + // Vector Insert Exponent DP/SP + def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc, + IIC_VecFP, []>; + def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc, + IIC_VecFP, []>; + + // Vector Extract Exponent/Significand DP/SP + def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc, []>; + def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc, []>; + def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc, []>; + def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc, []>; + + //===--------------------------------------------------------------------===// + + // Test Data Class SP/DP/QP + def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, + (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), + "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; + def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, + (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), + "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; + def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, + (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), + "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; + + // Vector Test Data Class SP/DP + def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5, + (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), + "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, []>; + def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, + (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), + "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, []>; + + //===--------------------------------------------------------------------===// + + // Maximum/Minimum Type-C/Type-J DP + // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT + def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + + //===--------------------------------------------------------------------===// + + // Vector Byte-Reverse H/W/D/Q Word + def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>; + def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc, []>; + def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>; + def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; + + // Vector Permute + def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, + IIC_VecPerm, []>; + def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc, + IIC_VecPerm, []>; + + // Vector Splat Immediate Byte + def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), + "xxspltib $XT, $IMM8", IIC_VecPerm, []>; + + //===--------------------------------------------------------------------===// + // Vector/Scalar Load/Store Instructions + + let mayLoad = 1 in { + // Load Vector + def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), + "lxv $XT, $src", IIC_LdStLFD, []>; + // Load DWord + def LXSD : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src), + "lxsd $vD, $src", IIC_LdStLFD, []>; + // Load SP from src, convert it to DP, and place in dword[0] + def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src), + "lxssp $vD, $src", IIC_LdStLFD, []>; + + // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different + // "out" and "in" dag + class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, + RegisterOperand vtype, list<dag> pattern> + : XX1Form<opcode, xo, (outs vtype:$XT), (ins memrr:$src), + !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>; + + // Load as Integer Byte/Halfword & Zero Indexed + def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, []>; + def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, []>; + + // Load Vector Halfword*8/Byte*16 Indexed + def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>; + def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>; + + // Load Vector Indexed + def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, []>; + + // Load Vector (Left-justified) with Length + def LXVL : X_XT6_RA5_RB5<31, 269, "lxvl" , vsrc, []>; + def LXVLL : X_XT6_RA5_RB5<31, 301, "lxvll" , vsrc, []>; + + // Load Vector Word & Splat Indexed + def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>; + } // end mayLoad + + let mayStore = 1 in { + // Store Vector + def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), + "stxv $XT, $dst", IIC_LdStSTFD, []>; + // Store DWord + def STXSD : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst), + "stxsd $vS, $dst", IIC_LdStSTFD, []>; + // Convert DP of dword[0] to SP, and Store to dst + def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst), + "stxssp $vS, $dst", IIC_LdStSTFD, []>; + + // [PO S RA RB XO SX] + class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, + RegisterOperand vtype, list<dag> pattern> + : XX1Form<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst), + !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>; + + // Store as Integer Byte/Halfword Indexed + def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, []>; + def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, []>; + + // Store Vector Halfword*8/Byte*16 Indexed + def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>; + def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>; + + // Store Vector Indexed + def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, []>; + + // Store Vector (Left-justified) with Length + def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>; + def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>; + } // end mayStore + + // Patterns for which instructions from ISA 3.0 are a better match + let Predicates = [IsLittleEndian, HasP9Vector] in { + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + } // IsLittleEndian, HasP9Vector + + let Predicates = [IsBigEndian, HasP9Vector] in { + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; + } // IsLittleEndian, HasP9Vector +} // end HasP9Vector, AddedComplexity diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp deleted file mode 100644 index e3a35d5df358..000000000000 --- a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ /dev/null @@ -1,233 +0,0 @@ -//===-------- PPCLoopDataPrefetch.cpp - Loop Data Prefetching Pass --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a Loop Data Prefetching Pass. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ppc-loop-data-prefetch" -#include "PPC.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -using namespace llvm; - -// By default, we limit this to creating 16 PHIs (which is a little over half -// of the allocatable register set). -static cl::opt<bool> -PrefetchWrites("ppc-loop-prefetch-writes", cl::Hidden, cl::init(false), - cl::desc("Prefetch write addresses")); - -// This seems like a reasonable default for the BG/Q (this pass is enabled, by -// default, only on the BG/Q). -static cl::opt<unsigned> -PrefDist("ppc-loop-prefetch-distance", cl::Hidden, cl::init(300), - cl::desc("The loop prefetch distance")); - -static cl::opt<unsigned> -CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), - cl::desc("The loop prefetch cache line size")); - -namespace llvm { - void initializePPCLoopDataPrefetchPass(PassRegistry&); -} - -namespace { - - class PPCLoopDataPrefetch : public FunctionPass { - public: - static char ID; // Pass ID, replacement for typeid - PPCLoopDataPrefetch() : FunctionPass(ID) { - initializePPCLoopDataPrefetchPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); - // FIXME: For some reason, preserving SE here breaks LSR (even if - // this pass changes nothing). - // AU.addPreserved<ScalarEvolutionWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - } - - bool runOnFunction(Function &F) override; - bool runOnLoop(Loop *L); - - private: - AssumptionCache *AC; - LoopInfo *LI; - ScalarEvolution *SE; - const TargetTransformInfo *TTI; - const DataLayout *DL; - }; -} - -char PPCLoopDataPrefetch::ID = 0; -INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch", - "PPC Loop Data Prefetch", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch", - "PPC Loop Data Prefetch", false, false) - -FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPrefetch(); } - -bool PPCLoopDataPrefetch::runOnFunction(Function &F) { - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - DL = &F.getParent()->getDataLayout(); - AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - - bool MadeChange = false; - - for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) - for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) - MadeChange |= runOnLoop(*L); - - return MadeChange; -} - -bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { - bool MadeChange = false; - - // Only prefetch in the inner-most loop - if (!L->empty()) - return MadeChange; - - SmallPtrSet<const Value *, 32> EphValues; - CodeMetrics::collectEphemeralValues(L, AC, EphValues); - - // Calculate the number of iterations ahead to prefetch - CodeMetrics Metrics; - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) { - - // If the loop already has prefetches, then assume that the user knows - // what he or she is doing and don't add any more. - for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); - J != JE; ++J) - if (CallInst *CI = dyn_cast<CallInst>(J)) - if (Function *F = CI->getCalledFunction()) - if (F->getIntrinsicID() == Intrinsic::prefetch) - return MadeChange; - - Metrics.analyzeBasicBlock(*I, *TTI, EphValues); - } - unsigned LoopSize = Metrics.NumInsts; - if (!LoopSize) - LoopSize = 1; - - unsigned ItersAhead = PrefDist/LoopSize; - if (!ItersAhead) - ItersAhead = 1; - - SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads; - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) { - for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); - J != JE; ++J) { - Value *PtrValue; - Instruction *MemI; - - if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { - MemI = LMemI; - PtrValue = LMemI->getPointerOperand(); - } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { - if (!PrefetchWrites) continue; - MemI = SMemI; - PtrValue = SMemI->getPointerOperand(); - } else continue; - - unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); - if (PtrAddrSpace) - continue; - - if (L->isLoopInvariant(PtrValue)) - continue; - - const SCEV *LSCEV = SE->getSCEV(PtrValue); - const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); - if (!LSCEVAddRec) - continue; - - // We don't want to double prefetch individual cache lines. If this load - // is known to be within one cache line of some other load that has - // already been prefetched, then don't prefetch this one as well. - bool DupPref = false; - for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, - 16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end(); - K != KE; ++K) { - const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second); - if (const SCEVConstant *ConstPtrDiff = - dyn_cast<SCEVConstant>(PtrDiff)) { - int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); - if (PD < (int64_t) CacheLineSize) { - DupPref = true; - break; - } - } - } - if (DupPref) - continue; - - const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr( - SE->getConstant(LSCEVAddRec->getType(), ItersAhead), - LSCEVAddRec->getStepRecurrence(*SE))); - if (!isSafeToExpand(NextLSCEV, *SE)) - continue; - - PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); - - Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); - SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); - Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); - - IRBuilder<> Builder(MemI); - Module *M = (*I)->getParent()->getParent(); - Type *I32 = Type::getInt32Ty((*I)->getContext()); - Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); - Builder.CreateCall( - PrefetchFunc, - {PrefPtrValue, - ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), - ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); - - MadeChange = true; - } - } - - return MadeChange; -} - diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 5e188268fee9..48a71cfc2a6e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -144,6 +144,9 @@ static Value *GetPointerOperand(Value *MemI) { } bool PPCLoopPreIncPrep::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 44a692d4bb42..18377a44a7f8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -40,21 +40,15 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ Mangler *Mang = AP.Mang; const DataLayout &DL = AP.getDataLayout(); MCContext &Ctx = AP.OutContext; - bool isDarwin = TM.getTargetTriple().isOSDarwin(); SmallString<128> Name; StringRef Suffix; - if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) { - if (isDarwin) - Suffix = "$stub"; - } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) + if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) Suffix = "$non_lazy_ptr"; if (!Suffix.empty()) Name += DL.getPrivateGlobalPrefix(); - unsigned PrefixLen = Name.size(); - if (!MO.isGlobal()) { assert(MO.isSymbol() && "Isn't a symbol reference"); Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); @@ -63,42 +57,16 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ TM.getNameWithPrefix(Name, GV, *Mang); } - unsigned OrigLen = Name.size() - PrefixLen; - Name += Suffix; MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); - StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); - - // If the target flags on the operand changes the name of the symbol, do that - // before we return the symbol. - if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) { - MachineModuleInfoImpl::StubValueTy &StubSym = - getMachOMMI(AP).getFnStubEntry(Sym); - if (StubSym.getPointer()) - return Sym; - - if (MO.isGlobal()) { - StubSym = - MachineModuleInfoImpl:: - StubValueTy(AP.getSymbol(MO.getGlobal()), - !MO.getGlobal()->hasInternalLinkage()); - } else { - StubSym = - MachineModuleInfoImpl:: - StubValueTy(Ctx.getOrCreateSymbol(OrigName), false); - } - return Sym; - } // If the symbol reference is actually to a non_lazy_ptr, not to the symbol, // then add the suffix. if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) { MachineModuleInfoMachO &MachO = getMachOMMI(AP); - - MachineModuleInfoImpl::StubValueTy &StubSym = - (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? - MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym); - + + MachineModuleInfoImpl::StubValueTy &StubSym = MachO.getGVStubEntry(Sym); + if (!StubSym.getPointer()) { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: @@ -139,7 +107,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, break; } - if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) + if (MO.getTargetFlags() == PPCII::MO_PLT) RefKind = MCSymbolRefExpr::VK_PLT; const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index fe339d70d7de..a57a83d7aa93 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -63,6 +63,8 @@ private: public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(*MF.getFunction())) + return false; initialize(MF); return simplifyCode(); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 10a8ce068d40..4c29aa06f048 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -104,6 +104,10 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// Whether this uses the PIC Base register or not. bool UsesPICBase; + /// True if this function has a subset of CSRs that is handled explicitly via + /// copies + bool IsSplitCSR; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -125,7 +129,8 @@ public: VarArgsNumFPR(0), CRSpillFrameIndex(0), MF(MF), - UsesPICBase(0) {} + UsesPICBase(0), + IsSplitCSR(false) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -196,6 +201,9 @@ public: void setUsesPICBase(bool uses) { UsesPICBase = uses; } bool usesPICBase() const { return UsesPICBase; } + bool isSplitCSR() const { return IsSplitCSR; } + void setIsSplitCSR(bool s) { IsSplitCSR = s; } + MCSymbol *getPICOffsetSymbol() const; MCSymbol *getGlobalEPSymbol() const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp new file mode 100644 index 000000000000..bfe20c12974b --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp @@ -0,0 +1,166 @@ +//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The QPX vector registers overlay the scalar floating-point registers, and +// any scalar floating-point loads splat their value across all vector lanes. +// Thus, if we have a scalar load followed by a splat, we can remove the splat +// (i.e. replace the load with a load-and-splat pseudo instruction). +// +// This pass must run after anything that might do store-to-load forwarding. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "ppc-qpx-load-splat" + +STATISTIC(NumSimplified, "Number of QPX load splats simplified"); + +namespace llvm { + void initializePPCQPXLoadSplatPass(PassRegistry&); +} + +namespace { + struct PPCQPXLoadSplat : public MachineFunctionPass { + static char ID; + PPCQPXLoadSplat() : MachineFunctionPass(ID) { + initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + const char *getPassName() const override { + return "PowerPC QPX Load Splat Simplification"; + } + }; + char PPCQPXLoadSplat::ID = 0; +} + +INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat", + "PowerPC QPX Load Splat Simplification", + false, false) + +FunctionPass *llvm::createPPCQPXLoadSplatPass() { + return new PPCQPXLoadSplat(); +} + +bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + bool MadeChange = false; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + + for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) { + MachineBasicBlock *MBB = &*MFI; + SmallVector<MachineInstr *, 4> Splats; + + for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) { + MachineInstr *MI = &*MBBI; + + if (MI->hasUnmodeledSideEffects() || MI->isCall()) { + Splats.clear(); + continue; + } + + // We're looking for a sequence like this: + // %F0<def> = LFD 0, %X3<kill>, %QF0<imp-def>; mem:LD8[%a](tbaa=!2) + // %QF1<def> = QVESPLATI %QF0<kill>, 0, %RM<imp-use> + + for (auto SI = Splats.begin(); SI != Splats.end();) { + MachineInstr *SMI = *SI; + unsigned SplatReg = SMI->getOperand(0).getReg(); + unsigned SrcReg = SMI->getOperand(1).getReg(); + + if (MI->modifiesRegister(SrcReg, TRI)) { + switch (MI->getOpcode()) { + default: + SI = Splats.erase(SI); + continue; + case PPC::LFS: + case PPC::LFD: + case PPC::LFSU: + case PPC::LFDU: + case PPC::LFSUX: + case PPC::LFDUX: + case PPC::LFSX: + case PPC::LFDX: + case PPC::LFIWAX: + case PPC::LFIWZX: + if (SplatReg != SrcReg) { + // We need to change the load to define the scalar subregister of + // the QPX splat source register. + unsigned SubRegIndex = + TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg()); + unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); + + // Substitute both the explicit defined register, and also the + // implicit def of the containing QPX register. + MI->getOperand(0).setReg(SplatSubReg); + MI->substituteRegister(SrcReg, SplatReg, 0, *TRI); + } + + SI = Splats.erase(SI); + + // If SMI is directly after MI, then MBBI's base iterator is + // pointing at SMI. Adjust MBBI around the call to erase SMI to + // avoid invalidating MBBI. + ++MBBI; + SMI->eraseFromParent(); + --MBBI; + + ++NumSimplified; + MadeChange = true; + continue; + } + } + + // If this instruction defines the splat register, then we cannot move + // the previous definition above it. If it reads from the splat + // register, then it must already be alive from some previous + // definition, and if the splat register is different from the source + // register, then this definition must not be the load for which we're + // searching. + if (MI->modifiesRegister(SplatReg, TRI) || + (SrcReg != SplatReg && + MI->readsRegister(SplatReg, TRI))) { + SI = Splats.erase(SI); + continue; + } + + ++SI; + } + + if (MI->getOpcode() != PPC::QVESPLATI && + MI->getOpcode() != PPC::QVESPLATIs && + MI->getOpcode() != PPC::QVESPLATIb) + continue; + if (MI->getOperand(2).getImm() != 0) + continue; + + // If there are other uses of the scalar value after this, replacing + // those uses might be non-trivial. + if (!MI->getOperand(1).isKill()) + continue; + + Splats.push_back(MI); + } + } + + return MadeChange; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 934bdf622418..f0161a03d2d4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -116,6 +116,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList : CSR_Darwin32_SaveList); + if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR()) + return CSR_SRV464_TLS_PE_SaveList; + // On PPC64, we might need to save r2 (but only if it is not reserved). bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); @@ -128,6 +131,31 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { : CSR_SVR432_SaveList); } +const MCPhysReg * +PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { + assert(MF && "Invalid MachineFunction pointer."); + const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>(); + if (Subtarget.isDarwinABI()) + return nullptr; + if (!TM.isPPC64()) + return nullptr; + if (MF->getFunction()->getCallingConv() != CallingConv::CXX_FAST_TLS) + return nullptr; + if (!MF->getInfo<PPCFunctionInfo>()->isSplitCSR()) + return nullptr; + + // On PPC64, we might need to save r2 (but only if it is not reserved). + bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2); + if (Subtarget.hasAltivec()) + return SaveR2 + ? CSR_SVR464_R2_Altivec_ViaCopy_SaveList + : CSR_SVR464_Altivec_ViaCopy_SaveList; + else + return SaveR2 + ? CSR_SVR464_R2_ViaCopy_SaveList + : CSR_SVR464_ViaCopy_SaveList; +} + const uint32_t * PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { @@ -232,16 +260,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (TFI->needsFP(MF)) Reserved.set(PPC::R31); + bool IsPositionIndependent = TM.isPositionIndependent(); if (hasBasePointer(MF)) { - if (Subtarget.isSVR4ABI() && !TM.isPPC64() && - TM.getRelocationModel() == Reloc::PIC_) + if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) Reserved.set(PPC::R29); else Reserved.set(PPC::R30); } - if (Subtarget.isSVR4ABI() && !TM.isPPC64() && - TM.getRelocationModel() == Reloc::PIC_) + if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) Reserved.set(PPC::R30); // Reserve Altivec registers when Altivec is unavailable. @@ -907,8 +934,7 @@ unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { if (TM.isPPC64()) return PPC::X30; - if (Subtarget.isSVR4ABI() && - TM.getRelocationModel() == Reloc::PIC_) + if (Subtarget.isSVR4ABI() && TM.isPositionIndependent()) return PPC::R29; return PPC::R30; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index b15fde83c9f3..459502eeb2e9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -75,6 +75,7 @@ public: /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const override; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td index d0954a11cd6a..b4d72eff2b85 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -70,6 +70,8 @@ def IIC_LdStSTFDU : InstrItinClass; def IIC_LdStSTVEBX : InstrItinClass; def IIC_LdStSTWCX : InstrItinClass; def IIC_LdStSync : InstrItinClass; +def IIC_LdStCOPY : InstrItinClass; +def IIC_LdStPASTE : InstrItinClass; def IIC_SprISYNC : InstrItinClass; def IIC_SprMFSR : InstrItinClass; def IIC_SprMTMSR : InstrItinClass; @@ -104,12 +106,17 @@ def IIC_VecVSR : InstrItinClass; def IIC_SprMTMSRD : InstrItinClass; def IIC_SprSLIE : InstrItinClass; def IIC_SprSLBIE : InstrItinClass; +def IIC_SprSLBIEG : InstrItinClass; def IIC_SprSLBMTE : InstrItinClass; def IIC_SprSLBMFEE : InstrItinClass; def IIC_SprSLBIA : InstrItinClass; +def IIC_SprSLBSYNC : InstrItinClass; def IIC_SprTLBIA : InstrItinClass; def IIC_SprTLBIEL : InstrItinClass; def IIC_SprTLBIE : InstrItinClass; +def IIC_SprABORT : InstrItinClass; +def IIC_SprMSGSYNC : InstrItinClass; +def IIC_SprSTOP : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td index 04a43bc03251..2455e5e52de5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td @@ -597,11 +597,12 @@ def PPC440Itineraries : ProcessorItineraries< def PPC440Model : SchedMachineModel { let IssueWidth = 2; // 2 instructions are dispatched per cycle. - let MinLatency = -1; // OperandCycles are interpreted as MinLatency. let LoadLatency = 5; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let CompleteModel = 0; + let Itineraries = PPC440Itineraries; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td index 21a357a2efcf..54cfae5d74b7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td @@ -1,10 +1,10 @@ //===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // Primary reference: @@ -160,12 +160,13 @@ def PPCA2Itineraries : ProcessorItineraries< def PPCA2Model : SchedMachineModel { let IssueWidth = 1; // 1 instruction is dispatched per cycle. - let MinLatency = -1; // OperandCycles are interpreted as MinLatency. let LoadLatency = 6; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. let MispredictPenalty = 13; + let CompleteModel = 0; + let Itineraries = PPCA2Itineraries; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td index 36b8517dabf1..f687d326b52d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This file defines the itinerary class data for the Freescale e500mc 32-bit +// This file defines the itinerary class data for the Freescale e500mc 32-bit // Power processor. -// +// // All information is derived from the "e500mc Core Reference Manual", // Freescale Document Number E500MCRM, Rev. 1, 03/2012. // @@ -25,12 +25,12 @@ def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2 // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. // Some instructions can only execute in SFX0 but not SFX1. -// The CFX has a bypass path, allowing non-divide instructions to execute +// The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is executed. def E500_SFX0 : FuncUnit; // Simple unit 0 def E500_SFX1 : FuncUnit; // Simple unit 1 def E500_BU : FuncUnit; // Branch unit -def E500_CFX_DivBypass +def E500_CFX_DivBypass : FuncUnit; // CFX divide bypass path def E500_CFX_0 : FuncUnit; // CFX pipeline def E500_LSU_0 : FuncUnit; // LSU pipeline @@ -271,12 +271,12 @@ def PPCE500mcItineraries : ProcessorItineraries< [NoBypass, E500_GPR_Bypass]>, InstrItinData<IIC_FPGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, InstrStage<2, [E500_FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 + [11, 1, 1], // Latency = 8, Repeat rate = 2 [E500_FPR_Bypass, E500_FPR_Bypass, E500_FPR_Bypass]>, InstrItinData<IIC_FPAddSub, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, InstrStage<4, [E500_FPU_0]>], - [13, 1, 1], // Latency = 10, Repeat rate = 4 + [13, 1, 1], // Latency = 10, Repeat rate = 4 [E500_FPR_Bypass, E500_FPR_Bypass, E500_FPR_Bypass]>, InstrItinData<IIC_FPCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, @@ -311,10 +311,11 @@ def PPCE500mcItineraries : ProcessorItineraries< def PPCE500mcModel : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = -1; // OperandCycles are interpreted as MinLatency. let LoadLatency = 5; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let CompleteModel = 0; + let Itineraries = PPCE500mcItineraries; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td index 7c2693ef0d4f..5db886cf8f94 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This file defines the itinerary class data for the Freescale e5500 64-bit +// This file defines the itinerary class data for the Freescale e5500 64-bit // Power processor. -// +// // All information is derived from the "e5500 Core Reference Manual", // Freescale Document Number e5500RM, Rev. 1, 03/2012. // @@ -25,16 +25,16 @@ def E5500_DIS1 : FuncUnit; // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. -// The CFX has a bypass path, allowing non-divide instructions to execute +// The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is being executed. def E5500_SFX0 : FuncUnit; // Simple unit 0 def E5500_SFX1 : FuncUnit; // Simple unit 1 def E5500_BU : FuncUnit; // Branch unit -def E5500_CFX_DivBypass +def E5500_CFX_DivBypass : FuncUnit; // CFX divide bypass path def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0 -def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1 +def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1 def E5500_LSU_0 : FuncUnit; // LSU pipeline def E5500_FPU_0 : FuncUnit; // FPU pipeline @@ -331,12 +331,12 @@ def PPCE5500Itineraries : ProcessorItineraries< [E5500_GPR_Bypass]>, InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<1, [E5500_FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 + [11, 2, 2], // Latency = 7, Repeat rate = 1 [E5500_FPR_Bypass, E5500_FPR_Bypass, E5500_FPR_Bypass]>, InstrItinData<IIC_FPAddSub, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<1, [E5500_FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 + [11, 2, 2], // Latency = 7, Repeat rate = 1 [E5500_FPR_Bypass, E5500_FPR_Bypass, E5500_FPR_Bypass]>, InstrItinData<IIC_FPCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, @@ -351,7 +351,7 @@ def PPCE5500Itineraries : ProcessorItineraries< E5500_FPR_Bypass, E5500_FPR_Bypass]>, InstrItinData<IIC_FPDivS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<16, [E5500_FPU_0]>], - [24, 2, 2], // Latency = 20, Repeat rate = 16 + [24, 2, 2], // Latency = 20, Repeat rate = 16 [E5500_FPR_Bypass, E5500_FPR_Bypass, E5500_FPR_Bypass]>, InstrItinData<IIC_FPFused, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, @@ -371,10 +371,11 @@ def PPCE5500Itineraries : ProcessorItineraries< def PPCE5500Model : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = -1; // OperandCycles are interpreted as MinLatency. let LoadLatency = 6; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let CompleteModel = 0; + let Itineraries = PPCE5500Itineraries; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td index a3b73ab4454f..b5a9f96d45ae 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td @@ -40,7 +40,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IIC_IntMulLI , [InstrStage<4, [G5_IU1, G5_IU2]>]>, InstrItinData<IIC_IntRFID , [InstrStage<1, [G5_IU2]>]>, InstrItinData<IIC_IntRotateD , [InstrStage<2, [G5_IU1, G5_IU2]>]>, - InstrItinData<IIC_IntRotateDI , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntRotateDI , [InstrStage<2, [G5_IU1, G5_IU2]>]>, InstrItinData<IIC_IntRotate , [InstrStage<4, [G5_IU1, G5_IU2]>]>, InstrItinData<IIC_IntShift , [InstrStage<2, [G5_IU1, G5_IU2]>]>, InstrItinData<IIC_IntTrapD , [InstrStage<1, [G5_IU1, G5_IU2]>]>, @@ -51,14 +51,14 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IIC_BrMCRX , [InstrStage<3, [G5_BPU]>]>, InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStLoad , [InstrStage<3, [G5_SLU]>]>, - InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>, - InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>, - InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>, InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>, InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>, - InstrItinData<IIC_LdStSTFDU , [InstrStage<4, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<4, [G5_SLU]>]>, InstrItinData<IIC_LdStLD , [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStLDU , [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStLDUX , [InstrStage<3, [G5_SLU]>]>, @@ -67,8 +67,8 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IIC_LdStLFDU , [InstrStage<5, [G5_SLU]>]>, InstrItinData<IIC_LdStLFDUX , [InstrStage<5, [G5_SLU]>]>, InstrItinData<IIC_LdStLHA , [InstrStage<5, [G5_SLU]>]>, - InstrItinData<IIC_LdStLHAU , [InstrStage<5, [G5_SLU]>]>, - InstrItinData<IIC_LdStLHAUX , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<5, [G5_SLU]>]>, InstrItinData<IIC_LdStLMW , [InstrStage<64, [G5_SLU]>]>, InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStLWA , [InstrStage<5, [G5_SLU]>]>, @@ -118,12 +118,13 @@ def G5Itineraries : ProcessorItineraries< def G5Model : SchedMachineModel { let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle. - let MinLatency = 0; // Out-of-order dispatch. let LoadLatency = 3; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. let MispredictPenalty = 16; + let CompleteModel = 0; + let Itineraries = G5Itineraries; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td index 267f56726180..a8678f56900e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -382,7 +382,6 @@ def P7Model : SchedMachineModel { // branches), but the total internal issue bandwidth per // cycle (from all queues) is 8. - let MinLatency = 0; // Out-of-order dispatch. let LoadLatency = 3; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. @@ -391,6 +390,8 @@ def P7Model : SchedMachineModel { // Try to make sure we have at least 10 dispatch groups in a loop. let LoopMicroOpBufferSize = 40; + let CompleteModel = 0; + let Itineraries = P7Itineraries; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td index 69e6d05c6604..8e52da583a0d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td @@ -391,7 +391,6 @@ def P8Model : SchedMachineModel { // up to six non-branch instructions. // up to two branches in a dispatch group. - let MinLatency = 0; // Out-of-order dispatch. let LoadLatency = 3; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. @@ -400,6 +399,8 @@ def P8Model : SchedMachineModel { // Try to make sure we have at least 10 dispatch groups in a loop. let LoopMicroOpBufferSize = 60; + let CompleteModel = 0; + let Itineraries = P8Itineraries; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index c0fcb6cbb9dc..603f0fccc7c6 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -70,6 +70,8 @@ void PPCSubtarget::initializeEnvironment() { HasP8Vector = false; HasP8Altivec = false; HasP8Crypto = false; + HasP9Vector = false; + HasP9Altivec = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; @@ -82,7 +84,6 @@ void PPCSubtarget::initializeEnvironment() { HasFPRND = false; HasFPCVT = false; HasISEL = false; - HasPOPCNTD = false; HasBPERMD = false; HasExtDiv = false; HasCMPB = false; @@ -103,12 +104,15 @@ void PPCSubtarget::initializeEnvironment() { HasHTM = false; HasFusion = false; HasFloat128 = false; + IsISA3_0 = false; + + HasPOPCNTD = POPCNTD_Unavailable; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Determine default and user specified characteristics std::string CPUName = CPU; - if (CPUName.empty()) { + if (CPUName.empty() || CPU == "generic") { // If cross-compiling with -march=ppc64le without -mcpu if (TargetTriple.getArch() == Triple::ppc64le) CPUName = "ppc64le"; @@ -142,18 +146,20 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le); } -/// hasLazyResolverStub - Return true if accesses to the specified global have -/// to go through a dyld lazy resolution stub. This means that an extra load -/// is required to get the address of the global. +/// Return true if accesses to the specified global have to go through a dyld +/// lazy resolution stub. This means that an extra load is required to get the +/// address of the global. bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const { - // We never have stubs if HasLazyResolverStubs=false or if in static mode. - if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static) - return false; - bool isDecl = GV->isDeclaration(); - if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage()) + if (!HasLazyResolverStubs) return false; - return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || - GV->hasCommonLinkage() || isDecl; + if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return true; + // 32 bit macho has no relocation for a-b if a is undefined, even if b is in + // the section that is being relocated. This means we have to use o load even + // for GVs that are known to be local to the dso. + if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) + return true; + return false; } // Embedded cores need aggressive scheduling (and some others also benefit). @@ -166,6 +172,8 @@ static bool needsAggressiveScheduling(unsigned Directive) { case PPC::DIR_E5500: case PPC::DIR_PWR7: case PPC::DIR_PWR8: + // FIXME: Same as P8 until POWER9 scheduling info is available + case PPC::DIR_PWR9: return true; } } @@ -191,8 +199,6 @@ void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { } void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, - MachineInstr *begin, - MachineInstr *end, unsigned NumRegionInstrs) const { if (needsAggressiveScheduling(DarwinDirective)) { Policy.OnlyTopDown = false; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h index 4f5c95c1483f..9fe286a3b7a9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -18,9 +18,9 @@ #include "PPCISelLowering.h" #include "PPCInstrInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -56,6 +56,7 @@ namespace PPC { DIR_PWR6X, DIR_PWR7, DIR_PWR8, + DIR_PWR9, DIR_64 }; } @@ -64,6 +65,13 @@ class GlobalValue; class TargetMachine; class PPCSubtarget : public PPCGenSubtargetInfo { +public: + enum POPCNTDKind { + POPCNTD_Unavailable, + POPCNTD_Slow, + POPCNTD_Fast + }; + protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -92,6 +100,8 @@ protected: bool HasP8Vector; bool HasP8Altivec; bool HasP8Crypto; + bool HasP9Vector; + bool HasP9Altivec; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -101,7 +111,6 @@ protected: bool HasFPRND; bool HasFPCVT; bool HasISEL; - bool HasPOPCNTD; bool HasBPERMD; bool HasExtDiv; bool HasCMPB; @@ -122,6 +131,9 @@ protected: bool HasHTM; bool HasFusion; bool HasFloat128; + bool IsISA3_0; + + POPCNTDKind HasPOPCNTD; /// When targeting QPX running a stock PPC64 Linux kernel where the stack /// alignment has not been changed, we need to keep the 16-byte alignment @@ -132,7 +144,7 @@ protected: PPCFrameLowering FrameLowering; PPCInstrInfo InstrInfo; PPCTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; + SelectionDAGTargetInfo TSInfo; public: /// This constructor initializes the data members to match that @@ -167,7 +179,7 @@ public: const PPCTargetLowering *getTargetLowering() const override { return &TLInfo; } - const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { return &TSInfo; } const PPCRegisterInfo *getRegisterInfo() const override { @@ -230,9 +242,10 @@ public: bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } bool hasP8Crypto() const { return HasP8Crypto; } + bool hasP9Vector() const { return HasP9Vector; } + bool hasP9Altivec() const { return HasP9Altivec; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } - bool hasPOPCNTD() const { return HasPOPCNTD; } bool hasBPERMD() const { return HasBPERMD; } bool hasExtDiv() const { return HasExtDiv; } bool hasCMPB() const { return HasCMPB; } @@ -261,6 +274,9 @@ public: bool hasHTM() const { return HasHTM; } bool hasFusion() const { return HasFusion; } bool hasFloat128() const { return HasFloat128; } + bool isISA3_0() const { return IsISA3_0; } + + POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; } const Triple &getTargetTriple() const { return TargetTriple; } @@ -271,6 +287,7 @@ public: bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isDarwinABI() const { return isTargetMachO() || isDarwin(); } bool isSVR4ABI() const { return !isDarwinABI(); } @@ -286,8 +303,6 @@ public: void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; void overrideSchedPolicy(MachineSchedPolicy &Policy, - MachineInstr *begin, - MachineInstr *end, unsigned NumRegionInstrs) const override; bool useAA() const override; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index a9d2e888f4b7..61ce48ecd04f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -73,10 +73,7 @@ protected: DebugLoc DL = MI->getDebugLoc(); unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3; unsigned Opc1, Opc2; - SmallVector<unsigned, 4> OrigRegs; - OrigRegs.push_back(OutReg); - OrigRegs.push_back(InReg); - OrigRegs.push_back(GPR3); + const unsigned OrigRegs[] = {OutReg, InReg, GPR3}; switch (MI->getOpcode()) { default: diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp index bf165c9edc6e..7c53a5601790 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -61,10 +61,10 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" -#include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" @@ -74,7 +74,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index d24b590317f5..a4ff86cb1e21 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -15,7 +15,9 @@ #include "PPC.h" #include "PPCTargetObjectFile.h" #include "PPCTargetTransformInfo.h" +#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/MCStreamer.h" @@ -43,6 +45,10 @@ opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, cl::desc("Disable VSX Swap Removal for PPC")); static cl:: +opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden, + cl::desc("Disable QPX load splat simplification")); + +static cl:: opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden, cl::desc("Disable machine peepholes for PPC")); @@ -172,6 +178,16 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, return PPCTargetMachine::PPC_ABI_UNKNOWN; } +static Reloc::Model getEffectiveRelocModel(const Triple &TT, + Optional<Reloc::Model> RM) { + if (!RM.hasValue()) { + if (TT.isOSDarwin()) + return Reloc::DynamicNoPIC; + return Reloc::Static; + } + return *RM; +} + // The FeatureString here is a little subtle. We are modifying the feature // string with what are (currently) non-function specific overrides as it goes // into the LLVMTargetMachine constructor and then using the stored value in the @@ -179,10 +195,11 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, - computeFSAdditions(FS, OL, TT), Options, RM, CM, OL), + computeFSAdditions(FS, OL, TT), Options, + getEffectiveRelocModel(TT, RM), CM, OL), TLOF(createTLOF(getTargetTriple())), TargetABI(computeTargetABI(TT, Options)), Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) { @@ -214,7 +231,8 @@ void PPC32TargetMachine::anchor() { } PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} @@ -223,7 +241,8 @@ void PPC64TargetMachine::anchor() { } PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} @@ -245,8 +264,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { // it as a key for the subtarget since that can be the only difference // between two functions. bool SoftFloat = - F.hasFnAttribute("use-soft-float") && - F.getFnAttribute("use-soft-float").getValueAsString() == "true"; + F.getFnAttribute("use-soft-float").getValueAsString() == "true"; // If the soft float attribute is set on the function turn on the soft float // subtarget feature. if (SoftFloat) @@ -313,9 +331,9 @@ void PPCPassConfig::addIRPasses() { if (EnablePrefetch.getNumOccurrences() > 0) UsePrefetching = EnablePrefetch; if (UsePrefetching) - addPass(createPPCLoopDataPrefetchPass()); + addPass(createLoopDataPrefetchPass()); - if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { + if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or // multiple GEPs with single index. @@ -379,18 +397,35 @@ void PPCPassConfig::addMachineSSAOptimization() { } void PPCPassConfig::addPreRegAlloc() { - initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); - insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, - &PPCVSXFMAMutateID); - if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_) + if (getOptLevel() != CodeGenOpt::None) { + initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); + insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, + &PPCVSXFMAMutateID); + } + + // FIXME: We probably don't need to run these for -fPIE. + if (getPPCTargetMachine().isPositionIndependent()) { + // FIXME: LiveVariables should not be necessary here! + // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on + // LiveVariables. This (unnecessary) dependency has been removed now, + // however a stage-2 clang build fails without LiveVariables computed here. + addPass(&LiveVariablesID, false); addPass(createPPCTLSDynamicCallPass()); + } if (EnableExtraTOCRegDeps) addPass(createPPCTOCRegDepsPass()); } void PPCPassConfig::addPreSched2() { - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { addPass(&IfConverterID); + + // This optimization must happen after anything that might do store-to-load + // forwarding. Here we're after RA (and, thus, when spills are inserted) + // but before post-RA scheduling. + if (!DisableQPXLoadSplat) + addPass(createPPCQPXLoadSplatPass()); + } } void PPCPassConfig::addPreEmitPass() { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index 6496339519a1..59b4f1e30c0e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -21,7 +21,7 @@ namespace llvm { -/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets. +/// Common code between 32-bit and 64-bit PowerPC targets. /// class PPCTargetMachine : public LLVMTargetMachine { public: @@ -35,8 +35,9 @@ private: public: PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, Reloc::Model RM, - CodeModel::Model CM, CodeGenOpt::Level OL); + StringRef FS, const TargetOptions &Options, + Optional<Reloc::Model> RM, CodeModel::Model CM, + CodeGenOpt::Level OL); ~PPCTargetMachine() override; @@ -57,25 +58,25 @@ public: }; }; -/// PPC32TargetMachine - PowerPC 32-bit target machine. +/// PowerPC 32-bit target machine. /// class PPC32TargetMachine : public PPCTargetMachine { virtual void anchor(); public: PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; -/// PPC64TargetMachine - PowerPC 64-bit target machine. +/// PowerPC 64-bit target machine. /// class PPC64TargetMachine : public PPCTargetMachine { virtual void anchor(); public: PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp index 798bb9d6b892..8f660355c0ac 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -53,7 +53,7 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal( const MCExpr *PPC64LinuxTargetObjectFile:: getDebugThreadLocalSymbol(const MCSymbol *Sym) const { const MCExpr *Expr = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_DTPREL, getContext()); return MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(0x8000, getContext()), getContext()); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index cd86dabd5abe..9331e41fb9c1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -21,6 +21,12 @@ using namespace llvm; static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); +// This is currently only used for the data prefetch pass which is only enabled +// for BG/Q by default. +static cl::opt<unsigned> +CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), + cl::desc("The loop prefetch cache line size")); + //===----------------------------------------------------------------------===// // // PPC cost model. @@ -30,8 +36,9 @@ cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); TargetTransformInfo::PopcntSupportKind PPCTTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - if (ST->hasPOPCNTD() && TyWidth <= 64) - return TTI::PSK_FastHardware; + if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64) + return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ? + TTI::PSK_SlowHardware : TTI::PSK_FastHardware; return TTI::PSK_Software; } @@ -230,6 +237,18 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { } +unsigned PPCTTIImpl::getCacheLineSize() { + // This is currently only used for the data prefetch pass which is only + // enabled for BG/Q by default. + return CacheLineSize; +} + +unsigned PPCTTIImpl::getPrefetchDistance() { + // This seems like a reasonable default for the BG/Q (this pass is enabled, by + // default, only on the BG/Q). + return 300; +} + unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { unsigned Directive = ST->getDarwinDirective(); // The 440 has no SIMD support, but floating-point instructions @@ -248,8 +267,9 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { // For P7 and P8, floating-point instructions have a 6-cycle latency and // there are two execution units, so unroll by 12x for latency hiding. - if (Directive == PPC::DIR_PWR7 || - Directive == PPC::DIR_PWR8) + // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready + if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || + Directive == PPC::DIR_PWR9) return 12; // For most things, modern systems have two execution units (and @@ -355,7 +375,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, // If we can use the permutation-based load sequence, then this is also // relatively cheap (not counting loop-invariant instructions): one load plus // one permute (the last load in a series has extra cost, but we're - // neglecting that here). Note that on the P7, we should do unaligned loads + // neglecting that here). Note that on the P7, we could do unaligned loads // for Altivec types using the VSX instructions, but that's more expensive // than using the permutation-based load sequence. On the P8, that's no // longer true. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 04c1b02235f0..5ea9a543cdb1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -70,6 +70,8 @@ public: bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); + unsigned getCacheLineSize(); + unsigned getPrefetchDistance(); unsigned getMaxInterleaveFactor(unsigned VF); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp index 782583ce3423..60f1ad5585ff 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -13,11 +13,11 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" -#include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCHazardRecognizers.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 6b19a2f7118b..7c22cb22bfa5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -38,8 +38,14 @@ using namespace llvm; -static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", -cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden); +// Temporarily disable FMA mutation by default, since it doesn't handle +// cross-basic-block intervals well. +// See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html +// http://reviews.llvm.org/D17087 +static cl::opt<bool> DisableVSXFMAMutate( + "disable-ppc-vsx-fma-mutation", + cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), + cl::Hidden); #define DEBUG_TYPE "ppc-vsx-fma-mutate" @@ -99,7 +105,7 @@ protected: // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9 // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9 - SlotIndex FMAIdx = LIS->getInstructionIndex(MI); + SlotIndex FMAIdx = LIS->getInstructionIndex(*MI); VNInfo *AddendValNo = LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); @@ -168,21 +174,32 @@ protected: if (OtherUsers || KillsAddendSrc) continue; - // Find one of the product operands that is killed by this instruction. + // The transformation doesn't work well with things like: + // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5; + // unless vreg11 is also a kill, so skip when it is not, + // and check operand 3 to see it is also a kill to handle the case: + // %vreg5 = A-form-op %vreg5, %vreg5, %vreg11; + // where vreg5 and vreg11 are both kills. This case would be skipped + // otherwise. + unsigned OldFMAReg = MI->getOperand(0).getReg(); + + // Find one of the product operands that is killed by this instruction. unsigned KilledProdOp = 0, OtherProdOp = 0; - if (LIS->getInterval(MI->getOperand(2).getReg()) - .Query(FMAIdx).isKill()) { + unsigned Reg2 = MI->getOperand(2).getReg(); + unsigned Reg3 = MI->getOperand(3).getReg(); + if (LIS->getInterval(Reg2).Query(FMAIdx).isKill() + && Reg2 != OldFMAReg) { KilledProdOp = 2; OtherProdOp = 3; - } else if (LIS->getInterval(MI->getOperand(3).getReg()) - .Query(FMAIdx).isKill()) { + } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill() + && Reg3 != OldFMAReg) { KilledProdOp = 3; OtherProdOp = 2; } - // If there are no killed product operands, then this transformation is - // likely not profitable. + // If there are no usable killed product operands, then this + // transformation is likely not profitable. if (!KilledProdOp) continue; @@ -212,14 +229,6 @@ protected: bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef(); bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef(); - unsigned OldFMAReg = MI->getOperand(0).getReg(); - - // The transformation doesn't work well with things like: - // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5; - // so leave such things alone. - if (OldFMAReg == KilledProdReg) - continue; - // If there isn't a class that fits, we can't perform the transform. // This is needed for correctness with a mixture of VSX and Altivec // instructions to make sure that a low VSX register is not assigned to @@ -236,23 +245,33 @@ protected: MI->getOperand(0).setReg(KilledProdReg); MI->getOperand(1).setReg(KilledProdReg); MI->getOperand(3).setReg(AddendSrcReg); - MI->getOperand(2).setReg(OtherProdReg); MI->getOperand(0).setSubReg(KilledProdSubReg); MI->getOperand(1).setSubReg(KilledProdSubReg); MI->getOperand(3).setSubReg(AddSubReg); - MI->getOperand(2).setSubReg(OtherProdSubReg); MI->getOperand(1).setIsKill(KilledProdRegKill); MI->getOperand(3).setIsKill(AddRegKill); - MI->getOperand(2).setIsKill(OtherProdRegKill); MI->getOperand(1).setIsUndef(KilledProdRegUndef); MI->getOperand(3).setIsUndef(AddRegUndef); - MI->getOperand(2).setIsUndef(OtherProdRegUndef); MI->setDesc(TII->get(AltOpc)); + // If the addend is also a multiplicand, replace it with the addend + // source in both places. + if (OtherProdReg == AddendMI->getOperand(0).getReg()) { + MI->getOperand(2).setReg(AddendSrcReg); + MI->getOperand(2).setSubReg(AddSubReg); + MI->getOperand(2).setIsKill(AddRegKill); + MI->getOperand(2).setIsUndef(AddRegUndef); + } else { + MI->getOperand(2).setReg(OtherProdReg); + MI->getOperand(2).setSubReg(OtherProdSubReg); + MI->getOperand(2).setIsKill(OtherProdRegKill); + MI->getOperand(2).setIsUndef(OtherProdRegUndef); + } + DEBUG(dbgs() << " -> " << *MI); // The killed product operand was killed here, so we can reuse it now @@ -312,7 +331,7 @@ protected: // Remove the (now unused) copy. DEBUG(dbgs() << " removing: " << *AddendMI << '\n'); - LIS->RemoveMachineInstrFromMaps(AddendMI); + LIS->RemoveMachineInstrFromMaps(*AddendMI); AddendMI->eraseFromParent(); Changed = true; @@ -323,6 +342,9 @@ protected: public: bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(*MF.getFunction())) + return false; + // If we don't have VSX then go ahead and return without doing // anything. const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 27c540fcf211..d53c8e38254f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -191,6 +191,9 @@ private: public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(*MF.getFunction())) + return false; + // If we don't have VSX on the subtarget, don't do anything. const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); if (!STI.hasVSX()) @@ -404,9 +407,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::VSPLTB: case PPC::VSPLTH: case PPC::VSPLTW: + case PPC::XXSPLTW: // Splats are lane-sensitive, but we can use special handling - // to adjust the source lane for the splat. This is not yet - // implemented. When it is, we need to uncomment the following: + // to adjust the source lane for the splat. SwapVector[VecIdx].IsSwappable = 1; SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT; break; @@ -512,7 +515,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // permute control vectors (for shift values 1, 2, 3). However, // VPERM has a more restrictive register class. case PPC::XXSLDWI: - case PPC::XXSPLTW: break; } } @@ -690,6 +692,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; unsigned UseReg = MI->getOperand(0).getReg(); MachineInstr *DefMI = MRI->getVRegDef(UseReg); + unsigned DefReg = DefMI->getOperand(0).getReg(); int DefIdx = SwapMap[DefMI]; if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad || @@ -705,6 +708,25 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { DEBUG(MI->dump()); DEBUG(dbgs() << "\n"); } + + // Ensure all uses of the register defined by DefMI feed store + // instructions + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) { + int UseIdx = SwapMap[&UseMI]; + + if (SwapVector[UseIdx].VSEMI->getOpcode() != MI->getOpcode()) { + SwapVector[Repr].WebRejected = 1; + + DEBUG(dbgs() << + format("Web %d rejected for swap not feeding only stores\n", + Repr)); + DEBUG(dbgs() << " def " << " : "); + DEBUG(DefMI->dump()); + DEBUG(dbgs() << " use " << UseIdx << ": "); + DEBUG(SwapVector[UseIdx].VSEMI->dump()); + DEBUG(dbgs() << "\n"); + } + } } } @@ -803,12 +825,21 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { llvm_unreachable("Unexpected splat opcode"); case PPC::VSPLTB: NElts = 16; break; case PPC::VSPLTH: NElts = 8; break; - case PPC::VSPLTW: NElts = 4; break; + case PPC::VSPLTW: + case PPC::XXSPLTW: NElts = 4; break; } - unsigned EltNo = MI->getOperand(1).getImm(); + unsigned EltNo; + if (MI->getOpcode() == PPC::XXSPLTW) + EltNo = MI->getOperand(2).getImm(); + else + EltNo = MI->getOperand(1).getImm(); + EltNo = (EltNo + NElts / 2) % NElts; - MI->getOperand(1).setImm(EltNo); + if (MI->getOpcode() == PPC::XXSPLTW) + MI->getOperand(2).setImm(EltNo); + else + MI->getOperand(1).setImm(EltNo); DEBUG(dbgs() << " Into: "); DEBUG(MI->dump()); diff --git a/contrib/llvm/lib/Target/PowerPC/README_P9.txt b/contrib/llvm/lib/Target/PowerPC/README_P9.txt new file mode 100644 index 000000000000..d56f7cca7b21 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/README_P9.txt @@ -0,0 +1,605 @@ +//===- README_P9.txt - Notes for improving Power9 code gen ----------------===// + +TODO: Instructions Need Implement Instrinstics or Map to LLVM IR + +Altivec: +- Vector Compare Not Equal (Zero): + vcmpneb(.) vcmpneh(.) vcmpnew(.) + vcmpnezb(.) vcmpnezh(.) vcmpnezw(.) + . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic) + +- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd + . Don't use llvm extractelement because they have different semantics + . Use instrinstics: + (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM)) + +- Vector Extract Unsigned Byte Left/Right-Indexed: + vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx + . Use instrinstics: + // Left-Indexed + (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB)) + + // Right-Indexed + (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB)) + +- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw + (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM)) + (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM)) + (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM)) + +- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]: + vclzlsbb vctzlsbb + . Use intrinsic: + (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB)) + +- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd + . Map to llvm cttz + (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb + (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh + (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw + (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd + +- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d + . vextsb2w: + (set v4i32:$vD, (sext v4i8:$vB)) + + // PowerISA_V3.0: + do i = 0 to 3 + VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3]) + end + + . vextsh2w: + (set v4i32:$vD, (sext v4i16:$vB)) + + // PowerISA_V3.0: + do i = 0 to 3 + VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1]) + end + + . vextsb2d + (set v2i64:$vD, (sext v2i8:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7]) + end + + . vextsh2d + (set v2i64:$vD, (sext v2i16:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3]) + end + + . vextsw2d + (set v2i64:$vD, (sext v2i32:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1]) + end + +- Vector Integer Negate: vnegw vnegd + . Map to llvm ineg + (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw + (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd + +- Vector Parity Byte: vprtybw vprtybd vprtybq + . Use intrinsic: + (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB)) + (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB)) + (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB)) + +- Vector (Bit) Permute (Right-indexed): + . vbpermd: Same as "vbpermq", use VX1_Int_Ty2: + VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>; + + . vpermr: use VA1a_Int_Ty3 + VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>; + +- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi + . Use intrinsic: + VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>; + VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>; + VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>; + VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>; + +- Vector Shift Left/Right: vslv vsrv + . Use intrinsic, don't map to llvm shl and lshr, because they have different + semantics, e.g. vslv: + + do i = 0 to 15 + sh ← VR[VRB].byte[i].bit[5:7] + VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7] + end + + VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1] + + . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>; + VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>; + +- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword: + vmul10uq vmul10cuq + . Use intrinsic: + VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>; + VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>; + +- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword: + vmul10euq vmul10ecuq + . Use intrinsic: + VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>; + VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>; + +- Decimal Convert From/to National/Zoned/Signed-QWord: + bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB)) + +- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS)) + +- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS)) + + . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7] + +- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) + + . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63]) + +VSX: +- QP Copy Sign: xscpsgnqp + . Similar to xscpsgndp + . (set f128:$vT, (fcopysign f128:$vB, f128:$vA) + +- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp + . Similar to xsabsdp/xsnabsdp/xsnegdp + . (set f128:$vT, (fabs f128:$vB)) // xsabsqp + (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp + (set f128:$vT, (fneg f128:$vB)) // xsnegqp + +- QP Add/Divide/Multiply/Subtract/Square-Root: + xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp + . Similar to xsadddp + . isCommutable = 1 + (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp + (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp + + . isCommutable = 0 + (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp + (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp + (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp + +- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root: + xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo + . Similar to xsrsqrtedp?? + def XSRSQRTEDP : XX2Form<60, 74, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrsqrtedp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; + + . Define DAG Node in PPCInstrInfo.td: + def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>; + def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>; + def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>; + def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>; + def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>; + + DAG patterns of each instruction (PPCInstrVSX.td): + . isCommutable = 1 + (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo + (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo + + . isCommutable = 0 + (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo + (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo + (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo + +- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp + . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp + + . isCommutable = 1 + // xsmaddqp + [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsmsubqp + [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmaddqp + [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmsubqp + [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + +- Round to Odd of QP (Negative) Multiply-{Add/Subtract}: + xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo + . Similar to xsrsqrtedp?? + + . Define DAG Node in PPCInstrInfo.td: + def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>; + + It looks like we only need to define "PPCfmarto" for these instructions, + because according to PowerISA_V3.0, these instructions perform RTO on + fma's result: + xsmaddqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, src2) + rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsmsubqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) + rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsnmaddqp(o) + v ← bfp_MULTIPLY_ADD(src1,src3,src2) + rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsnmsubqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) + rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) + result ← bfp_CONVERT_TO_BFP128(rnd) + + DAG patterns of each instruction (PPCInstrVSX.td): + . isCommutable = 1 + // xsmaddqpo + [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsmsubqpo + [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmaddqpo + [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmsubqpo + [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + +- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp + . ref: XSCMPUDP + def XSCMPUDP : XX3Form_1<60, 35, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; + + . No SDAG, intrinsic, builtin are required?? + Or llvm fcmp order/unorder compare?? + +- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp + . No SDAG, intrinsic, builtin are required? + +- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp + . I checked existing instruction "XSCMPUDP". They are different in target + register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register + + . Use instrinsic: + (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB)) + +- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp. + . Similar to xvcmpeqdp: + defm XVCMPEQDP : XX3Form_Rcr<60, 99, + "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; + + . So we should use "XX3Form_Rcr" to implement instrinsic + +- Convert DP -> QP: xscvdpqp + . Similar to XSCVDPSP: + def XSCVDPSP : XX2Form<60, 265, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsp $XT, $XB", IIC_VecFP, []>; + . So, No SDAG, intrinsic, builtin are required?? + +- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo + . Similar to XSCVDPSP + . No SDAG, intrinsic, builtin are required?? + +- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero): + xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz + . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS", + "XSCVDPUXDS", "XSCVDPUXWS" + + . DAG patterns: + (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz + (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz + (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz + (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz + +- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp + . Similar to XSCVSXDSP + . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp + (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp + +- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp + . Similar to XSCVDPSP + . No SDAG, intrinsic, builtin are required?? + +- Vector HP -> SP: xvcvhpsp xvcvsphp + . Similar to XVCVDPSP: + def XVCVDPSP : XX2Form<60, 393, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsp $XT, $XB", IIC_VecFP, []>; + . No SDAG, intrinsic, builtin are required?? + +- Round to Quad-Precision Integer: xsrqpi xsrqpix + . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you + need to assign rounding mode in instruction + . Provide builtin? + (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB)) + (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB)) + +- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp + . Provide builtin? + (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB)) + +Fixed Point Facility: + +- Exploit cmprb and cmpeqb (perhaps for something like + isalpha/isdigit/isupper/islower and isspace respectivelly). This can + perhaps be done through a builtin. + +- Provide testing for cnttz[dw] +- Insert Exponent DP/QP: xsiexpdp xsiexpqp + . Use intrinsic? + . xsiexpdp: + // Note: rA and rB are the unsigned integer value. + (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB)) + + . xsiexpqp: + (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB)) + +- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp + . Use intrinsic? + . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp + (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp + (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp + (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp + +- Vector Insert Word: xxinsertw + - Useful for inserting f32/i32 elements into vectors (the element to be + inserted needs to be prepared) + . Note: llvm has insertelem in "Vector Operations" + ; yields <n x <ty>> + <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx> + + But how to map to it?? + [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + + . Or use intrinsic? + (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM)) + +- Vector Extract Unsigned Word: xxextractuw + - Not useful for extraction of f32 from v4f32 (the current pattern is better - + shift->convert) + - It is useful for (uint_to_fp (vector_extract v4i32, N)) + - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N)) + . Note: llvm has extractelement in "Vector Operations" + ; yields <ty> + <result> = extractelement <n x <ty>> <val>, <ty2> <idx> + + How to map to it?? + [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))] + + . Or use intrinsic? + (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM)) + +- Vector Insert Exponent DP/SP: xviexpdp xviexpsp + . Use intrinsic + (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB)) + +- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp + . Use intrinsic + (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB)) + (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB)) + +- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp + . No SDAG, intrinsic, builtin are required? + Because it seems that we have no way to map BF field? + + Instruction Form: [PO T XO B XO BX TX] + Asm: xststd* BF,XB,DCMX + + BF is an index to CR register field. + +- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp + . Use intrinsic + (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX)) + (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX)) + +- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp + . PowerISA_V3.0: + "xsmaxcdp can be used to implement the C/C++/Java conditional operation + (x>y)?x:y for single-precision and double-precision arguments." + + Note! c type and j type have different behavior when: + 1. Either input is NaN + 2. Both input are +-Infinity, +-Zero + + . dtype map to llvm fmaxnum/fminnum + jtype use intrinsic + + . xsmaxcdp xsmincdp + (set f64:$XT, (fmaxnum f64:$XA, f64:$XB)) + (set f64:$XT, (fminnum f64:$XA, f64:$XB)) + + . xsmaxjdp xsminjdp + (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB)) + (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB)) + +- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq + . Use intrinsic + (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB)) + (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB)) + (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB)) + (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB)) + +- Vector Permute: xxperm xxpermr + . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different + . Use intrinsic + (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB)) + (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB)) + +- Vector Splat Immediate Byte: xxspltib + . Similar to XXSPLTW: + def XXSPLTW : XX2Form_2<60, 164, + (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + + . No SDAG, intrinsic, builtin are required? + +- Load/Store Vector: lxv stxv + . Has likely SDAG match: + (set v?:$XT, (load ix16addr:$src)) + (set v?:$XT, (store ix16addr:$dst)) + + . Need define ix16addr in PPCInstrInfo.td + ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td + +- Load/Store Vector Indexed: lxvx stxvx + . Has likely SDAG match: + (set v?:$XT, (load xoaddr:$src)) + (set v?:$XT, (store xoaddr:$dst)) + +- Load/Store DWord: lxsd stxsd + . Similar to lxsdx/stxsdx: + def LXSDX : XX1Form<31, 588, + (outs vsfrc:$XT), (ins memrr:$src), + "lxsdx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (load xoaddr:$src))]>; + + . (set f64:$XT, (load ixaddr:$src)) + (set f64:$XT, (store ixaddr:$dst)) + +- Load/Store SP, with conversion from/to DP: lxssp stxssp + . Similar to lxsspx/stxsspx: + def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), + "lxsspx $XT, $src", IIC_LdStLFD, + [(set f32:$XT, (load xoaddr:$src))]>; + + . (set f32:$XT, (load ixaddr:$src)) + (set f32:$XT, (store ixaddr:$dst)) + +- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx + . Similar to lxsiwzx: + def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwzx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + + . (set f64:$XT, (PPClfiwzx xoaddr:$src)) + +- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx + . Similar to stxsiwx: + def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsiwx $XT, $dst", IIC_LdStSTFD, + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + + . (PPCstfiwx f64:$XT, xoaddr:$dst) + +- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x + . Similar to lxvd2x/lxvw4x: + def LXVD2X : XX1Form<31, 844, + (outs vsrc:$XT), (ins memrr:$src), + "lxvd2x $XT, $src", IIC_LdStLFD, + [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; + + . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src)) + (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src)) + +- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x + . Similar to stxvd2x/stxvw4x: + def STXVD2X : XX1Form<31, 972, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvd2x $XT, $dst", IIC_LdStSTFD, + [(store v2f64:$XT, xoaddr:$dst)]>; + + . (store v8i16:$XT, xoaddr:$dst) + (store v16i8:$XT, xoaddr:$dst) + +- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll + . Likely needs an intrinsic + . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src)) + (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src)) + + . (int_ppc_vsx_stxvl xoaddr:$dst)) + (int_ppc_vsx_stxvll xoaddr:$dst)) + +- Load Vector Word & Splat Indexed: lxvwsx + . Likely needs an intrinsic + . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src)) + +Atomic operations (l[dw]at, st[dw]at): +- Provide custom lowering for common atomic operations to use these + instructions with the correct Function Code +- Ensure the operands are in the correct register (i.e. RT+1, RT+2) +- Provide builtins since not all FC's necessarily have an existing LLVM + atomic operation + +Load Doubleword Monitored (ldmx): +- Investigate whether there are any uses for this. It seems to be related to + Garbage Collection so it isn't likely to be all that useful for most + languages we deal with. + +Move to CR from XER Extended (mcrxrx): +- Is there a use for this in LLVM? + +Fixed Point Facility: + +- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last + . Use instrinstics: + (int_ppc_copy_first i32:$rA, i32:$rB) + (int_ppc_copy i32:$rA, i32:$rB) + + (int_ppc_paste i32:$rA, i32:$rB) + (int_ppc_paste_last i32:$rA, i32:$rB) + + (int_cp_abort) + +- Message Synchronize: msgsync +- SLB*: slbieg slbsync +- stop + . No instrinstics diff --git a/contrib/llvm/lib/Target/PowerPC/p9-instrs.txt b/contrib/llvm/lib/Target/PowerPC/p9-instrs.txt new file mode 100644 index 000000000000..a70582aca398 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/p9-instrs.txt @@ -0,0 +1,442 @@ +Content: +======== +. Remaining Instructions (Total 56 Instructions, include 2 unknow instructions) +. Done (Total 155 Instructions: 101 VSX, 54 Altivec) + +//------------------------------------------------------------------------------ +//. Remaining Instructions +//------------------------------------------------------------------------------ +GCC reference: https://sourceware.org/ml/binutils/2015-11/msg00071.html + +// Add PC Immediate Shifted DX-form p69 +[PO RT d1 d0 XO d2] addpcis RT,D + subpcis Rx,value = addpcis Rx,-value + +// 6.17.2 Decimal Integer Format Conversion Instructions + +// Decimal Convert From National VX-form p352 +[PO VRT EO VRB 1 PS XO] bcdcfn. VRT,VRB,PS + +// Decimal Convert From Zoned VX-form p353 +[PO VRT EO VRB 1 PS XO] bcdcfz. VRT,VRB,PS + +// Decimal Convert To National VX-form p354 +[PO VRT EO VRB 1 / XO] bcdctn. VRT,VRB + +// Decimal Convert To Zoned VX-form p355 +[PO VRT EO VRB 1 PS XO] bcdctz. VRT,VRB,PS + +// Decimal Convert From Signed Quadword VX-form p356 +[PO VRT EO VRB 1 PS XO] bcdcfsq. VRT,VRB,PS + +// Decimal Convert To Signed Quadword VX-form p356 +[PO VRT EO VRB 1 / XO] bcdctsq. VRT,VRB + +// 6.17.3 Decimal Integer Sign Manipulation Instructions + +// Decimal Copy Sign VX-form p358 +[PO VRT VRA VRB XO] bcdcpsgn. VRT,VRA,VRB + +// Decimal Set Sign VX-form p358 +[PO VRT EO VRB 1 PS XO] bcdsetsgn. VRT,VRB,PS + +// Decimal Shift VX-form p359 +[PO VRT VRA VRB 1 PS XO] bcds. VRT,VRA,VRB,PS + +// Decimal Unsigned Shift VX-form p360 +[PO VRT VRA VRB 1 / XO] bcdus. VRT,VRA,VRB + +// Decimal Shift and Round VX-form p361 +[PO VRT VRA VRB 1 PS XO] bcdsr. VRT,VRA,VRB,PS + +// 6.17.5 Decimal Integer Truncate Instructions + +// Decimal Truncate VX-form p362 +[PO VRT VRA VRB 1 PS XO] bcdtrunc. VRT,VRA,VRB,PS + +// Decimal Unsigned Truncate VX-form p363 +[PO VRT VRA VRB 1 / XO] bcdutrunc. VRT,VRA,VRB + +// 3.3.10.1 Character-Type Compare Instructions + +// Compare Ranged Byte X-form p87 +[PO BF / L RA RB XO /] cmprb BF,L,RA,RB + +// Compare Equal Byte X-form p88 +[PO BF // RA RB XO /] cmpeqb BF,RA,RB + +// 3.3.13 Fixed-Point Logical Instructions + +// Count Trailing Zeros Word X-form p95 +[PO RS RA /// XO Rc] cnttzw(.) RA,RS + +// 3.3.13.1 64-bit Fixed-Point Logical Instructions + +// Count Trailing Zeros Doubleword X-form p98 +[PO RS RA /// XO Rc] cnttzd(.) RA,RS + +// 4.4 Copy-Paste Facility + +// Copy X-form p858 +[PO /// L RA RB XO /] copy RA,RB,L + copy_first = copy RA, RB, 1 +// CP_Abort p860 +[PO /// /// /// XO /] cp_abort + +// Paste p859 +[PO /// L RA RB XO Rc] paste(.) RA,RB,L + paste_last = paste RA,RB,1 + +// 3.3.9 Fixed-Point Arithmetic Instructions + +// Deliver A Random Number X-form p79 +[PO RT /// L /// XO /] darn RT,L + +// Multiply-Add High Doubleword VA-form p81 +[PO RT RA RB RC XO] maddhd RT,RA.RB,RC + +// Multiply-Add High Doubleword Unsigned VA-form p81 +[PO RT RA RB RC XO] maddhdu RT,RA.RB,RC + +// Multiply-Add Low Doubleword VA-form p81 +[PO RT RA RB RC XO] maddld RT,RA.RB,RC + +// Modulo Signed Word X-form p76 +[PO RT RA RB XO /] modsw RT,RA,RB + +// Modulo Unsigned Word X-form p76 +[PO RT RA RB XO /] moduw RT,RA,RB + +// Modulo Signed Doubleword X-form p84 +[PO RT RA RB XO /] modsd RT,RA,RB + +// Modulo Unsigned Doubleword X-form p84 +[PO RT RA RB XO /] modud RT,RA,RB + + +// DFP Test Significance Immediate [Quad] X-form p204 +[PO BF / UIM FRB XO /] dtstsfi BF,UIM,FRB +[PO BF / UIM FRBp XO /] dtstsfiq BF,UIM,FRBp + +// 3.3.14.2.1 64-bit Fixed-Point Shift Instructions + +// Extend-Sign Word and Shift Left Immediate XS-form p109 +[PO RS RA sh XO sh Rc] extswsli(.) RA,RS,SH + +// 4.5.1 Load Atomic + +// Load Word Atomic X-form p864 +[PO RT RA FC XO /] lwat RT,RA,FC + +// Load Doubleword Atomic X-form p864 +[PO RT RA FC XO /] ldat RT,RA,FC + +// 4.5.2 Store Atomic + +// Store Word Atomic X-form p866 +[PO RS RA FC XO /] stwat RS,RA,FC + +// Store Doubleword Atomic X-form p866 +[PO RS RA FC XO /] stdat RS,RA,FC + +// 3.3.2.1 64-bit Fixed-Point Load Instructions + +// Load Doubleword Monitored Indexed X-form p54 +[PO RT RA RB XO /] ldmx RT,RA,RB + +// 3.3.16 Move To/From Vector-Scalar Register Instructions + +// Move From VSR Lower Doubleword XX1-form p111 +[PO S RA /// XO SX] mfvsrld RA,XS + +// Move To VSR Double Doubleword XX1-form p114 +[PO T RA RB XO TX] mtvsrdd XT,RA,RB + +// Move To VSR Word & Splat XX1-form p115 +[PO T RA /// XO TX] mtvsrws XT,RA + +// Move to CR from XER Extended X-form p119 +[PO BF // /// /// XO /] mcrxrx BF + +// Set Boolean X-form p121 +[PO RT BFA // /// XO /] setb RT,BFA + +// Message Synchronize X-form p1126 +[PO /// /// /// XO /] msgsync + +// SLB Invalidate Entry Global X-form p1026 +[PO RS /// RB XO /] slbieg RS,RB + +// SLB Synchronize X-form p1031 +[PO /// /// /// XO /] slbsync + +// 3.3.2.1 Power-Saving Mode Instruction + +// stop XL-form p957 +[PO /// /// /// XO /] stop + +// 4.6.4 Wait Instruction +// Wait X-form p880 +[PO /// WC /// /// XO /] wait + +// Unknow Instructions: +urfid +- gcc's implementation: + {"urfid", XL(19,306), 0xffffffff, POWER9, PPCNONE, {0}}, + (4c 00 02 64|64 02 00 4c) urfid + +rmieg +- gcc's implementation: + {"rmieg", X(31,882), XRTRA_MASK, POWER9, PPCNONE, {RB}}, + (7c 00 f6 e4|e4 f6 00 7c) rmieg r30 + +//------------------------------------------------------------------------------ +//. Done: +//------------------------------------------------------------------------------ + +//====================================== +"vsx instructions" + +//-------------------------------------- +"7.6.1.2.1 VSX Scalar Move Instructions" +// VSX Scalar Quad-Precision Move Instructions + +// VSX Scalar Copy Sign Quad-Precision X-form p.553 +[PO VRT VRA VRB XO /] xscpsgnqp + +// VSX Scalar Absolute Quad-Precision X-form 531 +// VSX Scalar Negate Quad-Precision X-form 627 +// VSX Scalar Negative Absolute Quad-Precision X-form 626 +[PO VRT XO VRB XO /] xsabsqp xsnegqp xsnabsqp + +//-------------------------------------- +"7.6.1.3 VSX Floating-Point Arithmetic Instructions" + +// VSX Scalar Quad-Precision Elementary Arithmetic + +// VSX Scalar Add Quad-Precision [using round to Odd] X-form 539 +// VSX Scalar Divide Quad-Precision [using round to Odd] X-form 584 +// VSX Scalar Multiply Quad-Precision [using round to Odd] X-form 622 +[PO VRT VRA VRB XO RO] xsaddqp xsaddqpo xsdivqp xsdivqpo xsmulqp xsmulqpo + +// VSX Scalar Square Root Quad-Precision [using round to Odd] X-form 662 +// VSX Scalar Subtract Quad-Precision [using round to Odd] X-form 667 + xssubqp xssubqpo + +[PO VRT XO VRB XO RO] xssqrtqp xssqrtqpo + +// VSX Scalar Quad-Precision Multiply-Add Arithmetic Instructions + +// VSX Scalar Multiply-Add Quad-Precision [using round to Odd] X-form 596 +// VSX Scalar Multiply-Subtract Quad-Precision [using round to Odd] X-form 617 +// VSX Scalar Negative Multiply-Add Quad-Precision [using round to Odd] X-form 636 +// VSX Scalar Negative Multiply-Subtract Quad-Precision [using round to Odd] +// X-form 645 +[PO VRT VRA VRB XO RO] xsmaddqp xsmaddqpo xsmsubqp xsmsubqpo + xsnmaddqp xsnmaddqpo xsnmsubqp xsnmsubqpo + +22 +//-------------------------------------- +"7.6.1.4 VSX Floating-Point Compare Instructions" + +// VSX Scalar Quad-Precision Compare Instructions + +// VSX Scalar Compare Ordered Quad-Precision X-form 549 +// VSX Scalar Compare Unordered Quad-Precision X-form 552 +[PO BF // VRA VRB XO /] xscmpoqp xscmpuqp + +"7.6.1.8 VSX Scalar Floating-Point Support Instructions" +// VSX Scalar Compare Exponents Quad-Precision X-form p. 541 542 +[PO BF // A B XO AX BX /] xscmpexpdp +[PO BF // VRA VRB XO /] xscmpexpqp + +// VSX Scalar Compare DP, XX3-form, p.543 544 545 +// VSX Scalar Compare Equal Double-Precision, +[PO T A B XO AX BX TX] xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp + +// VSX Vector Compare Not Equal Double-Precision XX3-form 691 +[PO T A B Rc XO AX BX TX] xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp. + +//-------------------------------------- +"7.6.1.5 VSX FP-FP Conversion Instructions" +// VSX Scalar Quad-Precision Floating-Point Conversion Instructions + +// VSX Scalar round & Convert Quad-Precision format to Double-Precision format +// [using round to Odd] X-form 567 +[PO VRT XO VRB XO /] xscvqpdp xscvqpdpo (actually [PO VRT XO VRB XO RO]) +[PO VRT XO VRB XO /] xscvdpqp + +// VSX Scalar Quad-Precision Convert to Integer Instructions + +// VSX Scalar truncate & Convert Quad-Precision format to Signed Doubleword format +// 568 570 572 574 +[PO VRT XO VRB XO /] xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz +576 = 580 xscvsdqp xscvudqp + +"7.6.1.7 VSX Round to Floating-Point Integer Instructions" +// VSX Scalar round & Convert Double-Precision format to Half-Precision format +// XX2-form 554 566 +[PO T XO B XO BX TX] xscvdphp xscvhpdp + +// VSX Vector Convert Half-Precision format to Single-Precision format +// XX2-form 703 705 +[PO T XO B XO BX TX] xvcvhpsp xvcvsphp + +// VSX Scalar Round to Quad-Precision Integer [with Inexact] Z23-form 654 +[PO VRT /// R VRB RMC XO EX] xsrqpi xsrqpix + +// VSX Scalar Round Quad-Precision to Double-Extended Precision Z23-form 656 +[PO VRT /// R VRB RMC XO /] xsrqpxp +def XSRQPXP : Z23Form_1<63, 37, + (outs vrrc:$vT), (ins u5imm:$R, vrrc:$vB, u2imm:$RMC), + "xsrqpxp $vT, $R, $vB, $RMC"), IIC_VecFP, []>; + +27~28 +//-------------------------------------- +// VSX Scalar Insert Exponent Double-Precision X-form 588 +// VSX Scalar Insert Exponent Quad-Precision X-form 589 +[PO VT rA rB XO /] xsiexpdp +[PO VRT VRA VRB XO /] xsiexpqp + +// VSX Vector Insert Exponent Double-Precision XX3-form 722 +[PO T A B XO AX BX TX] xviexpdp xviexpsp + +// VSX Vector Extract Unsigned Word XX2-form 788 +// VSX Vector Insert Word XX2-form +[PO T / UIM B XO BX TX] xxextractuw xxinsertw + +// VSX Scalar Extract Exponent Double-Precision XX2-form 676 +[PO BF DCMX B XO BX /] +[PO T XO B XO BX /] xsxexpdp xsxsigdp +// X-form +[PO VRT XO VRB XO /] xsxexpqp xsxsigqp + +// VSX Vector Extract Exponent Double-Precision XX2-form 784 +[PO T XO B XO BX TX] xvxexpdp xvxexpsp + +// VSX Vector Extract Significand Double-Precision XX2-form 785 +[PO T XO B XO BX TX] xvxsigdp xvxsigsp + +//-------------------------------------- +// VSX Scalar Test Data Class Double-Precision XX2-form p673 +// VSX Scalar Test Data Class Quad-Precision X-form 674 +// VSX Scalar Test Data Class Single-Precision XX2-form 675 +[PO BF DCMX B XO BX /] xststdcdp xststdcsp +[PO BF DCMX VRB XO /] xststdcqp + +// VSX Vector Test Data Class Double-Precision XX2-form 782 783 +[PO T dx B XO dc XO dm BX TX] xvtstdcdp xvtstdcsp + +//-------------------------------------- +// VSX Scalar Maximum Type-C Double-Precision XX3-form 601 ~ 609 +[PO T A B XO AX BX TX] xsmaxcdp xsmaxjdp xsmincdp xsminjdp + +//-------------------------------------- +// VSX Vector Byte-Reverse Doubleword XX2-form 786 787 +[PO T XO B XO BX TX] xxbrd xxbrh xxbrq xxbrw + +// VSX Vector Permute XX3-form 794 +[PO T A B XO AX BX TX] xxperm xxpermr + +// VSX Vector Splat Immediate Byte 796 x-form +[PO T EO IMM8 XO TX] xxspltib <= sign or unsigned? + +30 +//-------------------------------------- +// Load VSX Vector DQ-form 511 +[PO T RA DQ TX XO] lxv + +// Store VSX Vector DQ-form 526 +[PO S RA DQ SX XO] stxv + +// Load VSX Scalar Doubleword DS-form 499 +// Load VSX Scalar Single DS-form 504 +[PO VRT RA DS XO] lxsd lxssp + +// Store VSX Scalar Doubleword DS-form 517 +// Store VSX Scalar Single DS-form 520 +[PO VRT RA DS XO] stxsd stxssp + + +// Load VSX Vector Indexed X-form 511 +// Load VSX Scalar as Integer Byte & Zero Indexed X-form 501 +// Load VSX Vector Byte*16 Indexed X-form 506 +// Load VSX Vector with Length X-form 508 +// Load VSX Vector Left-justified with Length X-form 510 +// Load VSX Vector Halfword*8 Indexed X-form 514 +// Load VSX Vector Word & Splat Indexed X-form 516 +[PO T RA RB XO TX] lxvx lxsibzx lxsihzx lxvb16x lxvl lxvll lxvh8x lxvwsx + +// Store VSX Scalar as Integer Byte Indexed X-form 518 +// Store VSX Scalar as Integer Halfword Indexed X-form 518 +// Store VSX Vector Byte*16 Indexed X-form 522 +// Store VSX Vector Halfword*8 Indexed X-form 524 +// Store VSX Vector with Length X-form 526 +// Store VSX Vector Left-justified with Length X-form 528 +// Store VSX Vector Indexed X-form 529 +[PO S RA RB XO SX] stxsibx stxsihx stxvb16x stxvh8x stxvl stxvll stxvx + +21 + +//-------------------------------------- +". vector instructions" + +[1] PowerISA-v3.0 p.933 - Table 1, and Chapter 6. Vector Facility (altivec) +[2] https://sourceware.org/ml/binutils/2015-11/msg00071.html + +//-------------------------------------- +New patch: +// vector bit, p.367, 6.16 Vector Bit Permute Instruction +[PO VRT VRA VRB XO] vbpermd, (existing: vbpermq) + +// vector permute, p.280 +[PO VRT VRA VRB VRC XO] vpermr + +// vector rotate left, p.341 +[PO VRT VRA VRB XO] vrlwnm vrlwmi vrldnm vrldmi + +// vector shift, p.285 +[PO VRT VRA VRB XO] vslv vsrv + +// vector multiply-by-10, p.375 +[PO VRT VRA /// XO] vmul10cuq vmul10uq +[PO VRT VRA VRB XO] vmul10ecuq vmul10euq + +12 +//-------------------------------------- +http://reviews.llvm.org/D15887 + ext + neg + prty - vbpermd +// vector count leading/trailing zero +. new vx-form: p.31, 1.6.14 VX-FORM +[PO RT EO VRB XO] vclzlsbb vctzlsbb (p.363) + +// Vector Count Trailing Zeros Instructions, 362 +[PO VRT EO VRB XO] vctzb vctzh vctzw vctzd (v16i8 v8i16 v4i32 v2i64) + +// vector extend sign (p.314) +[PO VRT EO VRB XO] vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d + +// vector negate, p.313 +[PO VRT EO VRB XO] vnegd vnegw + +// vector parity, p.335 +[PO VRT EO VRB XO] vprtybd vprtybq vprtybw + +16 +//-------------------------------------- +// vector compare, p.330 +[PO VRT VRA VRB RC XO] vcmpneb vcmpneb. vcmpneh vcmpneh. vcmpnew vcmpnew. + vcmpnezb vcmpnezb. vcmpnezh vcmpnezh. vcmpnezw vcmpnezw. +12 +//-------------------------------------- +http://reviews.llvm.org/D15917 + insert +// vector extract (p.287) ref: vspltb (v2.07, p.227) +// vector insert, p.288 +[PO VRT / UIM VRB XO] vinsertb vinsertd vinserth vinsertw + +// Vector Extract Unsigned +[PO VRT / UIM VRB XO] vextractub vextractuh vextractuw vextractd + +// p.364: Vector Extract Unsigned Left/Right-Indexed +[PO RT RA VRB XO] vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx + +14 |