diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
35 files changed, 1315 insertions, 518 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 5847b3a52bfc..4863ac542736 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -114,7 +114,7 @@ public: } void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override { + uint64_t Value, bool IsPCRel, MCContext &Ctx) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 017d21af08a8..a00b56af0490 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -11,22 +11,28 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "PPCInstrInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOpcodes.h" +#include <cassert> +#include <cstdint> + using namespace llvm; #define DEBUG_TYPE "mccodeemitter" @@ -34,10 +40,8 @@ using namespace llvm; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { -class PPCMCCodeEmitter : public MCCodeEmitter { - PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete; - void operator=(const PPCMCCodeEmitter &) = delete; +class PPCMCCodeEmitter : public MCCodeEmitter { const MCInstrInfo &MCII; const MCContext &CTX; bool IsLittleEndian; @@ -46,8 +50,9 @@ public: PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : MCII(mcii), CTX(ctx), IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {} - - ~PPCMCCodeEmitter() override {} + PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete; + void operator=(const PPCMCCodeEmitter &) = delete; + ~PPCMCCodeEmitter() override = default; unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, @@ -103,6 +108,7 @@ public: uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override { @@ -137,7 +143,7 @@ public: } break; default: - llvm_unreachable ("Invalid instruction size"); + llvm_unreachable("Invalid instruction size"); } ++MCNumEmitted; // Keep track of the # of mi's emitted. @@ -238,7 +244,6 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return RegBits; } - unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { @@ -286,7 +291,6 @@ unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo, return reverseBits(Imm | RegBits) >> 22; } - unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) @@ -302,7 +306,6 @@ unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo, return reverseBits(Imm | RegBits) >> 22; } - unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) @@ -318,7 +321,6 @@ unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo, return reverseBits(Imm | RegBits) >> 22; } - unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { @@ -383,7 +385,5 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, return MO.getImm(); } - - #define ENABLE_INSTR_PREDICATE_VERIFIER #include "PPCGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index bbd10e5b260f..2d686f227919 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -11,22 +11,29 @@ // //===----------------------------------------------------------------------===// -#include "PPCMCTargetDesc.h" #include "InstPrinter/PPCInstPrinter.h" -#include "PPCMCAsmInfo.h" +#include "MCTargetDesc/PPCMCAsmInfo.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPCTargetStreamer.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -41,9 +48,10 @@ using namespace llvm; #include "PPCGenRegisterInfo.inc" // Pin the vtable to this file. -PPCTargetStreamer::~PPCTargetStreamer() {} PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} +PPCTargetStreamer::~PPCTargetStreamer() = default; + static MCInstrInfo *createPPCMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitPPCMCInstrInfo(X); @@ -96,12 +104,14 @@ static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM, } namespace { + class PPCTargetAsmStreamer : public PPCTargetStreamer { formatted_raw_ostream &OS; public: PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) : PPCTargetStreamer(S), OS(OS) {} + void emitTCEntry(const MCSymbol &S) override { OS << "\t.tc "; OS << S.getName(); @@ -109,12 +119,15 @@ public: OS << S.getName(); OS << '\n'; } + void emitMachine(StringRef CPU) override { OS << "\t.machine " << CPU << '\n'; } + void emitAbiVersion(int AbiVersion) override { OS << "\t.abiversion " << AbiVersion << '\n'; } + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); @@ -129,18 +142,22 @@ public: class PPCTargetELFStreamer : public PPCTargetStreamer { public: PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} + MCELFStreamer &getStreamer() { return static_cast<MCELFStreamer &>(Streamer); } + void emitTCEntry(const MCSymbol &S) override { // Creates a R_PPC64_TOC relocation Streamer.EmitValueToAlignment(8); Streamer.EmitSymbolValue(&S, 8); } + void emitMachine(StringRef CPU) override { // FIXME: Is there anything to do in here or does this directive only // limit the parser? } + void emitAbiVersion(int AbiVersion) override { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); @@ -148,6 +165,7 @@ public: Flags |= (AbiVersion & ELF::EF_PPC64_ABI); MCA.setELFHeaderEFlags(Flags); } + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { MCAssembler &MCA = getStreamer().getAssembler(); @@ -170,6 +188,7 @@ public: if ((Flags & ELF::EF_PPC64_ABI) == 0) MCA.setELFHeaderEFlags(Flags | 2); } + void emitAssignment(MCSymbol *S, const MCExpr *Value) override { auto *Symbol = cast<MCSymbolELF>(S); // When encoding an assignment to set symbol A to symbol B, also copy @@ -188,21 +207,26 @@ public: class PPCTargetMachOStreamer : public PPCTargetStreamer { public: PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} + void emitTCEntry(const MCSymbol &S) override { llvm_unreachable("Unknown pseudo-op: .tc"); } + void emitMachine(StringRef CPU) override { // FIXME: We should update the CPUType, CPUSubType in the Object file if // the new values are different from the defaults. } + void emitAbiVersion(int AbiVersion) override { llvm_unreachable("Unknown pseudo-op: .abiversion"); } + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { llvm_unreachable("Unknown pseudo-op: .localentry"); } }; -} + +} // end anonymous namespace static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 0989e0c8e268..893233ee2300 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -17,23 +17,22 @@ // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC -#include "llvm/Support/DataTypes.h" #include "llvm/Support/MathExtras.h" +#include <cstdint> namespace llvm { + class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; class MCObjectWriter; class MCRegisterInfo; -class MCSubtargetInfo; class MCTargetOptions; class Target; class Triple; class StringRef; class raw_pwrite_stream; -class raw_ostream; Target &getThePPC32Target(); Target &getThePPC64Target(); @@ -83,7 +82,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { return false; } -} // End llvm namespace +} // end namespace llvm // Generated files will use "namespace PPC". To avoid symbol clash, // undefine PPC here. PPC may be predefined on some hosts. @@ -103,4 +102,4 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { #define GET_SUBTARGETINFO_ENUM #include "PPCGenSubtargetInfo.inc" -#endif +#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index e01f49dce81e..38ae62b26757 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -45,11 +45,13 @@ namespace llvm { FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCTLSDynamicCallPass(); FunctionPass *createPPCBoolRetToIntPass(); + FunctionPass *createPPCExpandISELPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); void initializePPCVSXFMAMutatePass(PassRegistry&); void initializePPCBoolRetToIntPass(PassRegistry&); + void initializePPCExpandISELPass(PassRegistry &); extern char &PPCVSXFMAMutateID; namespace PPCII { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index f0e0ebc4946c..1f181d007f63 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -112,7 +112,9 @@ public: void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); bool runOnMachineFunction(MachineFunction &MF) override { Subtarget = &MF.getSubtarget<PPCSubtarget>(); - return AsmPrinter::runOnMachineFunction(MF); + bool Changed = AsmPrinter::runOnMachineFunction(MF); + emitXRayTable(); + return Changed; } }; @@ -134,6 +136,7 @@ public: void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; + void EmitInstruction(const MachineInstr *MI) override; }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -402,7 +405,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { .addImm(CallTarget & 0xFFFF)); // Save the current TOC pointer before the remote call. - int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40; + int TOCSaveOffset = Subtarget->getFrameLowering()->getTOCSaveOffset(); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD) .addReg(PPC::X2) .addImm(TOCSaveOffset) @@ -1046,6 +1049,97 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); } +void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) { + if (!Subtarget->isPPC64()) + return PPCAsmPrinter::EmitInstruction(MI); + + switch (MI->getOpcode()) { + default: + return PPCAsmPrinter::EmitInstruction(MI); + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: { + // .begin: + // b .end # lis 0, FuncId[16..32] + // nop # li 0, FuncId[0..15] + // std 0, -8(1) + // mflr 0 + // bl __xray_FunctionEntry + // mtlr 0 + // .end: + // + // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number + // of instructions change. + MCSymbol *BeginOfSled = OutContext.createTempSymbol(); + MCSymbol *EndOfSled = OutContext.createTempSymbol(); + OutStreamer->EmitLabel(BeginOfSled); + EmitToStreamer(*OutStreamer, + MCInstBuilder(PPC::B).addExpr( + MCSymbolRefExpr::create(EndOfSled, OutContext))); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP)); + EmitToStreamer( + *OutStreamer, + MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1)); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0)); + EmitToStreamer(*OutStreamer, + MCInstBuilder(PPC::BL8_NOP) + .addExpr(MCSymbolRefExpr::create( + OutContext.getOrCreateSymbol("__xray_FunctionEntry"), + OutContext))); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0)); + OutStreamer->EmitLabel(EndOfSled); + recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER); + break; + } + case TargetOpcode::PATCHABLE_FUNCTION_EXIT: { + // .p2align 3 + // .begin: + // b(lr)? # lis 0, FuncId[16..32] + // nop # li 0, FuncId[0..15] + // std 0, -8(1) + // mflr 0 + // bl __xray_FunctionExit + // mtlr 0 + // .end: + // b(lr)? + // + // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number + // of instructions change. + const MachineInstr *Next = [&] { + MachineBasicBlock::const_iterator It(MI); + assert(It != MI->getParent()->end()); + ++It; + assert(It->isReturn()); + return &*It; + }(); + OutStreamer->EmitCodeAlignment(8); + MCSymbol *BeginOfSled = OutContext.createTempSymbol(); + OutStreamer->EmitLabel(BeginOfSled); + MCInst TmpInst; + LowerPPCMachineInstrToMCInst(Next, TmpInst, *this, false); + EmitToStreamer(*OutStreamer, TmpInst); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP)); + EmitToStreamer( + *OutStreamer, + MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1)); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0)); + EmitToStreamer(*OutStreamer, + MCInstBuilder(PPC::BL8_NOP) + .addExpr(MCSymbolRefExpr::create( + OutContext.getOrCreateSymbol("__xray_FunctionExit"), + OutContext))); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0)); + recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT); + break; + } + case TargetOpcode::PATCHABLE_TAIL_CALL: + case TargetOpcode::PATCHABLE_RET: + // PPC's tail call instruction, e.g. PPC::TCRETURNdi8, doesn't really + // lower to a PPC::B instruction. The PPC::B instruction is generated + // before it, and handled by the normal case. + llvm_unreachable("Tail call is handled in the normal case. See comments" + "around this assert."); + } +} + void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) { PPCTargetStreamer *TS = diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index ae76386fdfb6..b7d3154d0000 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -78,7 +78,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { BlockSizes.resize(Fn.getNumBlockIDs()); auto GetAlignmentAdjustment = - [TII](MachineBasicBlock &MBB, unsigned Offset) -> unsigned { + [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned { unsigned Align = MBB.getAlignment(); if (!Align) return 0; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 2c62a0f1d909..70c4170653ae 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -298,15 +298,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { return true; else continue; // ISD::FCOPYSIGN is never a library call. - case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; - case Intrinsic::floor: Opcode = ISD::FFLOOR; break; - case Intrinsic::ceil: Opcode = ISD::FCEIL; break; - case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; - case Intrinsic::rint: Opcode = ISD::FRINT; break; - case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; - case Intrinsic::round: Opcode = ISD::FROUND; break; - case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; - case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; + case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; + case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; } } @@ -315,7 +317,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { // (i.e. soft float or atomics). If adapting for targets that do, // additional care is required here. - LibFunc::Func Func; + LibFunc Func; if (!F->hasLocalLinkage() && F->hasName() && LibInfo && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { @@ -329,50 +331,50 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { switch (Func) { default: return true; - case LibFunc::copysign: - case LibFunc::copysignf: + case LibFunc_copysign: + case LibFunc_copysignf: continue; // ISD::FCOPYSIGN is never a library call. - case LibFunc::copysignl: + case LibFunc_copysignl: return true; - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: continue; // ISD::FABS is never a library call. - case LibFunc::sqrt: - case LibFunc::sqrtf: - case LibFunc::sqrtl: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: Opcode = ISD::FSQRT; break; - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: Opcode = ISD::FFLOOR; break; - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: Opcode = ISD::FNEARBYINT; break; - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: Opcode = ISD::FCEIL; break; - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: Opcode = ISD::FRINT; break; - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: Opcode = ISD::FROUND; break; - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: Opcode = ISD::FTRUNC; break; - case LibFunc::fmin: - case LibFunc::fminf: - case LibFunc::fminl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: Opcode = ISD::FMINNUM; break; - case LibFunc::fmax: - case LibFunc::fmaxf: - case LibFunc::fmaxl: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: Opcode = ISD::FMAXNUM; break; } } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp new file mode 100644 index 000000000000..ebd414baf1d2 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -0,0 +1,458 @@ +//===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A pass that expands the ISEL instruction into an if-then-else sequence. +// This pass must be run post-RA since all operands must be physical registers. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCInstrInfo.h" +#include "PPCSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-expand-isel" + +STATISTIC(NumExpanded, "Number of ISEL instructions expanded"); +STATISTIC(NumRemoved, "Number of ISEL instructions removed"); +STATISTIC(NumFolded, "Number of ISEL instructions folded"); + +// If -ppc-gen-isel=false is set, we will disable generating the ISEL +// instruction on all PPC targets. Otherwise, if the user set option +// -misel or the platform supports ISEL by default, still generate the +// ISEL instruction, else expand it. +static cl::opt<bool> + GenerateISEL("ppc-gen-isel", + cl::desc("Enable generating the ISEL instruction."), + cl::init(true), cl::Hidden); + +namespace { +class PPCExpandISEL : public MachineFunctionPass { + DebugLoc dl; + MachineFunction *MF; + const TargetInstrInfo *TII; + bool IsTrueBlockRequired; + bool IsFalseBlockRequired; + MachineBasicBlock *TrueBlock; + MachineBasicBlock *FalseBlock; + MachineBasicBlock *NewSuccessor; + MachineBasicBlock::iterator TrueBlockI; + MachineBasicBlock::iterator FalseBlockI; + + typedef SmallVector<MachineInstr *, 4> BlockISELList; + typedef SmallDenseMap<int, BlockISELList> ISELInstructionList; + + // A map of MBB numbers to their lists of contained ISEL instructions. + ISELInstructionList ISELInstructions; + + /// Initialize the object. + void initialize(MachineFunction &MFParam); + + void handleSpecialCases(BlockISELList &BIL, MachineBasicBlock *MBB); + void reorganizeBlockLayout(BlockISELList &BIL, MachineBasicBlock *MBB); + void populateBlocks(BlockISELList &BIL); + void expandMergeableISELs(BlockISELList &BIL); + void expandAndMergeISELs(); + + bool canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI); + + /// Is this instruction an ISEL or ISEL8? + static bool isISEL(const MachineInstr &MI) { + return (MI.getOpcode() == PPC::ISEL || MI.getOpcode() == PPC::ISEL8); + } + + /// Is this instruction an ISEL8? + static bool isISEL8(const MachineInstr &MI) { + return (MI.getOpcode() == PPC::ISEL8); + } + + /// Are the two operands using the same register? + bool useSameRegister(const MachineOperand &Op1, const MachineOperand &Op2) { + return (Op1.getReg() == Op2.getReg()); + } + + /// + /// Collect all ISEL instructions from the current function. + /// + /// Walk the current function and collect all the ISEL instructions that are + /// found. The instructions are placed in the ISELInstructions vector. + /// + /// \return true if any ISEL instructions were found, false otherwise + /// + bool collectISELInstructions(); + +public: + static char ID; + PPCExpandISEL() : MachineFunctionPass(ID) { + initializePPCExpandISELPass(*PassRegistry::getPassRegistry()); + } + + /// + /// Determine whether to generate the ISEL instruction or expand it. + /// + /// Expand ISEL instruction into if-then-else sequence when one of + /// the following two conditions hold: + /// (1) -ppc-gen-isel=false + /// (2) hasISEL() return false + /// Otherwise, still generate ISEL instruction. + /// The -ppc-gen-isel option is set to true by default. Which means the ISEL + /// instruction is still generated by default on targets that support them. + /// + /// \return true if ISEL should be expanded into if-then-else code sequence; + /// false if ISEL instruction should be generated, i.e. not expaned. + /// + static bool isExpandISELEnabled(const MachineFunction &MF); + +#ifndef NDEBUG + void DumpISELInstructions() const; +#endif + + bool runOnMachineFunction(MachineFunction &MF) override { + if (!isExpandISELEnabled(MF)) + return false; + + DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n"); + initialize(MF); + + if (!collectISELInstructions()) { + DEBUG(dbgs() << "No ISEL instructions in this function\n"); + return false; + } + +#ifndef NDEBUG + DumpISELInstructions(); +#endif + + expandAndMergeISELs(); + + return true; + } +}; +} // end anonymous namespace + +void PPCExpandISEL::initialize(MachineFunction &MFParam) { + MF = &MFParam; + TII = MF->getSubtarget().getInstrInfo(); + ISELInstructions.clear(); +} + +bool PPCExpandISEL::isExpandISELEnabled(const MachineFunction &MF) { + return !GenerateISEL || !MF.getSubtarget<PPCSubtarget>().hasISEL(); +} + +bool PPCExpandISEL::collectISELInstructions() { + for (MachineBasicBlock &MBB : *MF) { + BlockISELList thisBlockISELs; + for (MachineInstr &MI : MBB) + if (isISEL(MI)) + thisBlockISELs.push_back(&MI); + if (!thisBlockISELs.empty()) + ISELInstructions.insert(std::make_pair(MBB.getNumber(), thisBlockISELs)); + } + return !ISELInstructions.empty(); +} + +#ifndef NDEBUG +void PPCExpandISEL::DumpISELInstructions() const { + for (const auto &I : ISELInstructions) { + DEBUG(dbgs() << "BB#" << I.first << ":\n"); + for (const auto &VI : I.second) + DEBUG(dbgs() << " "; VI->print(dbgs())); + } +} +#endif + +/// Contiguous ISELs that have the same condition can be merged. +bool PPCExpandISEL::canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI) { + // Same Condition Register? + if (!useSameRegister(PrevPushedMI->getOperand(3), MI->getOperand(3))) + return false; + + MachineBasicBlock::iterator PrevPushedMBBI = *PrevPushedMI; + MachineBasicBlock::iterator MBBI = *MI; + return (std::prev(MBBI) == PrevPushedMBBI); // Contiguous ISELs? +} + +void PPCExpandISEL::expandAndMergeISELs() { + for (auto &BlockList : ISELInstructions) { + DEBUG(dbgs() << "Expanding ISEL instructions in BB#" << BlockList.first + << "\n"); + + BlockISELList &CurrentISELList = BlockList.second; + auto I = CurrentISELList.begin(); + auto E = CurrentISELList.end(); + + while (I != E) { + BlockISELList SubISELList; + + SubISELList.push_back(*I++); + + // Collect the ISELs that can be merged together. + while (I != E && canMerge(SubISELList.back(), *I)) + SubISELList.push_back(*I++); + + expandMergeableISELs(SubISELList); + } + } +} + +void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL, + MachineBasicBlock *MBB) { + IsTrueBlockRequired = false; + IsFalseBlockRequired = false; + + auto MI = BIL.begin(); + while (MI != BIL.end()) { + assert(isISEL(**MI) && "Expecting an ISEL instruction"); + DEBUG(dbgs() << "ISEL: " << **MI << "\n"); + + MachineOperand &Dest = (*MI)->getOperand(0); + MachineOperand &TrueValue = (*MI)->getOperand(1); + MachineOperand &FalseValue = (*MI)->getOperand(2); + + // If at least one of the ISEL instructions satisfy the following + // condition, we need the True Block: + // The Dest Register and True Value Register are not the same + // Similarly, if at least one of the ISEL instructions satisfy the + // following condition, we need the False Block: + // The Dest Register and False Value Register are not the same. + + bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue); + bool IsORIInstRequired = !useSameRegister(Dest, FalseValue); + + // Special case 1, all registers used by ISEL are the same one. + if (!IsADDIInstRequired && !IsORIInstRequired) { + DEBUG(dbgs() << "Remove redudant ISEL instruction."); + NumRemoved++; + (*MI)->eraseFromParent(); + // Setting MI to the erase result keeps the iterator valid and increased. + MI = BIL.erase(MI); + continue; + } + + // Special case 2, the two input registers used by ISEL are the same. + // Note 1: We favor merging ISEL expansions over folding a single one. If + // the passed list has multiple merge-able ISEL's, we won't fold any. + // Note 2: There is no need to test for PPC::R0/PPC::X0 because PPC::ZERO/ + // PPC::ZERO8 will be used for the first operand if the value is meant to + // be zero. In this case, the useSameRegister method will return false, + // thereby preventing this ISEL from being folded. + + if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) { + DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy."); + NumFolded++; + BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::ADDI8 : PPC::ADDI)) + .add(Dest) + .add(TrueValue) + .add(MachineOperand::CreateImm(0)); + (*MI)->eraseFromParent(); + // Setting MI to the erase result keeps the iterator valid and increased. + MI = BIL.erase(MI); + continue; + } + + IsTrueBlockRequired |= IsADDIInstRequired; + IsFalseBlockRequired |= IsORIInstRequired; + MI++; + } +} + +void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL, + MachineBasicBlock *MBB) { + if (BIL.empty()) + return; + + assert((IsTrueBlockRequired || IsFalseBlockRequired) && + "Should have been handled by special cases earlier!"); + + MachineBasicBlock *Successor = nullptr; + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineBasicBlock::iterator MBBI = (*BIL.back()); + NewSuccessor = (MBBI != MBB->getLastNonDebugInstr() || !MBB->canFallThrough()) + // Another BB is needed to move the instructions that + // follow this ISEL. If the ISEL is the last instruction + // in a block that can't fall through, we also need a block + // to branch to. + ? MF->CreateMachineBasicBlock(LLVM_BB) + : nullptr; + + MachineFunction::iterator It = MBB->getIterator(); + ++It; // Point to the successor block of MBB. + + // If NewSuccessor is NULL then the last ISEL in this group is the last + // non-debug instruction in this block. Find the fall-through successor + // of this block to use when updating the CFG below. + if (!NewSuccessor) { + for (auto &Succ : MBB->successors()) { + if (MBB->isLayoutSuccessor(Succ)) { + Successor = Succ; + break; + } + } + } else + Successor = NewSuccessor; + + // The FalseBlock and TrueBlock are inserted after the MBB block but before + // its successor. + // Note this need to be done *after* the above setting the Successor code. + if (IsFalseBlockRequired) { + FalseBlock = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, FalseBlock); + } + + if (IsTrueBlockRequired) { + TrueBlock = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, TrueBlock); + } + + if (NewSuccessor) { + MF->insert(It, NewSuccessor); + + // Transfer the rest of this block into the new successor block. + NewSuccessor->splice(NewSuccessor->end(), MBB, + std::next(MachineBasicBlock::iterator(BIL.back())), + MBB->end()); + NewSuccessor->transferSuccessorsAndUpdatePHIs(MBB); + + // Copy the original liveIns of MBB to NewSuccessor. + for (auto &LI : MBB->liveins()) + NewSuccessor->addLiveIn(LI); + + // After splitting the NewSuccessor block, Regs defined but not killed + // in MBB should be treated as liveins of NewSuccessor. + // Note: Cannot use stepBackward instead since we are using the Reg + // liveness state at the end of MBB (liveOut of MBB) as the liveIn for + // NewSuccessor. Otherwise, will cause cyclic dependence. + LivePhysRegs LPR(MF->getSubtarget<PPCSubtarget>().getRegisterInfo()); + SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers; + for (MachineInstr &MI : *MBB) + LPR.stepForward(MI, Clobbers); + for (auto &LI : LPR) + NewSuccessor->addLiveIn(LI); + } else { + // Remove successor from MBB. + MBB->removeSuccessor(Successor); + } + + // Note that this needs to be done *after* transfering the successors from MBB + // to the NewSuccessor block, otherwise these blocks will also be transferred + // as successors! + MBB->addSuccessor(IsTrueBlockRequired ? TrueBlock : Successor); + MBB->addSuccessor(IsFalseBlockRequired ? FalseBlock : Successor); + + if (IsTrueBlockRequired) { + TrueBlockI = TrueBlock->begin(); + TrueBlock->addSuccessor(Successor); + } + + if (IsFalseBlockRequired) { + FalseBlockI = FalseBlock->begin(); + FalseBlock->addSuccessor(Successor); + } + + // Conditional branch to the TrueBlock or Successor + BuildMI(*MBB, BIL.back(), dl, TII->get(PPC::BC)) + .add(BIL.back()->getOperand(3)) + .addMBB(IsTrueBlockRequired ? TrueBlock : Successor); + + // Jump over the true block to the new successor if the condition is false. + BuildMI(*(IsFalseBlockRequired ? FalseBlock : MBB), + (IsFalseBlockRequired ? FalseBlockI : BIL.back()), dl, + TII->get(PPC::B)) + .addMBB(Successor); + + if (IsFalseBlockRequired) + FalseBlockI = FalseBlock->begin(); // get the position of PPC::B +} + +void PPCExpandISEL::populateBlocks(BlockISELList &BIL) { + for (auto &MI : BIL) { + assert(isISEL(*MI) && "Expecting an ISEL instruction"); + + MachineOperand &Dest = MI->getOperand(0); // location to store to + MachineOperand &TrueValue = MI->getOperand(1); // Value to store if + // condition is true + MachineOperand &FalseValue = MI->getOperand(2); // Value to store if + // condition is false + MachineOperand &ConditionRegister = MI->getOperand(3); // Condition + + DEBUG(dbgs() << "Dest: " << Dest << "\n"); + DEBUG(dbgs() << "TrueValue: " << TrueValue << "\n"); + DEBUG(dbgs() << "FalseValue: " << FalseValue << "\n"); + DEBUG(dbgs() << "ConditionRegister: " << ConditionRegister << "\n"); + + + // If the Dest Register and True Value Register are not the same one, we + // need the True Block. + bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue); + bool IsORIInstRequired = !useSameRegister(Dest, FalseValue); + + if (IsADDIInstRequired) { + // Copy the result into the destination if the condition is true. + BuildMI(*TrueBlock, TrueBlockI, dl, + TII->get(isISEL8(*MI) ? PPC::ADDI8 : PPC::ADDI)) + .add(Dest) + .add(TrueValue) + .add(MachineOperand::CreateImm(0)); + + // Add the LiveIn registers required by true block. + TrueBlock->addLiveIn(TrueValue.getReg()); + } + + if (IsORIInstRequired) { + // Add the LiveIn registers required by false block. + FalseBlock->addLiveIn(FalseValue.getReg()); + } + + if (NewSuccessor) { + // Add the LiveIn registers required by NewSuccessor block. + NewSuccessor->addLiveIn(Dest.getReg()); + NewSuccessor->addLiveIn(TrueValue.getReg()); + NewSuccessor->addLiveIn(FalseValue.getReg()); + NewSuccessor->addLiveIn(ConditionRegister.getReg()); + } + + // Copy the value into the destination if the condition is false. + if (IsORIInstRequired) + BuildMI(*FalseBlock, FalseBlockI, dl, + TII->get(isISEL8(*MI) ? PPC::ORI8 : PPC::ORI)) + .add(Dest) + .add(FalseValue) + .add(MachineOperand::CreateImm(0)); + + MI->eraseFromParent(); // Remove the ISEL instruction. + + NumExpanded++; + } +} + +void PPCExpandISEL::expandMergeableISELs(BlockISELList &BIL) { + // At this stage all the ISELs of BIL are in the same MBB. + MachineBasicBlock *MBB = BIL.back()->getParent(); + + handleSpecialCases(BIL, MBB); + reorganizeBlockLayout(BIL, MBB); + populateBlocks(BIL); +} + +INITIALIZE_PASS(PPCExpandISEL, DEBUG_TYPE, "PowerPC Expand ISEL Generation", + false, false) +char PPCExpandISEL::ID = 0; + +FunctionPass *llvm::createPPCExpandISELPass() { return new PPCExpandISEL(); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index e786ef9aee0e..4c9430a2eca0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -433,8 +433,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; - const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need @@ -519,8 +518,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { unsigned FPReg = is31 ? PPC::R31 : PPC::R1; unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; - const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool HasBP = RegInfo->hasBasePointer(MF); unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; @@ -616,8 +614,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, return true; // Get the list of callee-saved registers for the target. - const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); // Get all the available registers in the block. @@ -663,8 +660,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, // and the stack frame is large, we need two scratch registers. bool PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { - const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MachineFunction &MF = *(MBB->getParent()); bool HasBP = RegInfo->hasBasePointer(MF); unsigned FrameSize = determineFrameLayout(MF, false); @@ -694,10 +690,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo &MFI = MF.getFrameInfo(); - const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); - const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); + const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); @@ -1221,10 +1215,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); - const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); - const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); + const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); // Get alignment info so we know how to restore the SP. const MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -1550,8 +1542,7 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); - const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); + const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); // Create branch instruction for pseudo tail call return instruction unsigned RetOpcode = MBBI->getOpcode(); @@ -1589,8 +1580,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); // Save and clear the LR state. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); @@ -1793,8 +1783,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); } - const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); if (RegInfo->hasBasePointer(MF)) { HasGPSaveArea = true; @@ -1941,8 +1930,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, return false; MachineFunction *MF = MBB.getParent(); - const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); + const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; @@ -2083,8 +2071,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return false; MachineFunction *MF = MBB.getParent(); - const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo()); + const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); bool CR2Spilled = false; bool CR3Spilled = false; bool CR4Spilled = false; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 1e51c1f651c9..9c72638023bb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -12,30 +12,57 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" +#include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/Constants.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <new> +#include <tuple> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "ppc-codegen" @@ -60,6 +87,7 @@ static cl::opt<bool> EnableBranchHint( cl::Hidden); namespace { + //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine /// instructions for SelectionDAG operations. @@ -69,6 +97,7 @@ namespace { const PPCSubtarget *PPCSubTarget; const PPCTargetLowering *PPCLowering; unsigned GlobalBaseReg; + public: explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm) {} @@ -184,7 +213,6 @@ namespace { bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override { - switch(ConstraintID) { default: errs() << "ConstraintID: " << ConstraintID << "\n"; @@ -237,7 +265,8 @@ private: void transferMemOperands(SDNode *N, SDNode *Result); }; -} + +} // end anonymous namespace /// InsertVRSaveCode - Once the entire function has been instruction selected, /// all virtual registers are created and all machine instructions are built, @@ -303,7 +332,6 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { } } - /// getGlobalBaseReg - Output the instructions required to put the /// base address to use for accessing globals into a register. /// @@ -368,7 +396,6 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } - /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { @@ -833,6 +860,7 @@ static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) { } namespace { + class BitPermutationSelector { struct ValueBit { SDValue V; @@ -898,14 +926,12 @@ class BitPermutationSelector { // associated with each) used to choose the lowering method. struct ValueRotInfo { SDValue V; - unsigned RLAmt; - unsigned NumGroups; - unsigned FirstGroupStartIdx; - bool Repl32; + unsigned RLAmt = std::numeric_limits<unsigned>::max(); + unsigned NumGroups = 0; + unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); + bool Repl32 = false; - ValueRotInfo() - : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX), - Repl32(false) {} + ValueRotInfo() = default; // For sorting (in reverse order) by NumGroups, and then by // FirstGroupStartIdx. @@ -1985,7 +2011,8 @@ public: return RNLM; } }; -} // anonymous namespace + +} // end anonymous namespace bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { if (N->getValueType(0) != MVT::i32 && @@ -2450,7 +2477,6 @@ void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); } - // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void PPCDAGToDAGISel::Select(SDNode *N) { @@ -2474,19 +2500,18 @@ void PPCDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; - case ISD::Constant: { + case ISD::Constant: if (N->getValueType(0) == MVT::i64) { ReplaceNode(N, getInt64(CurDAG, N)); return; } break; - } - case ISD::SETCC: { + case ISD::SETCC: if (trySETCC(N)) return; break; - } + case PPCISD::GlobalBaseReg: ReplaceNode(N, getGlobalBaseReg()); return; @@ -2502,11 +2527,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } - case PPCISD::READ_TIME_BASE: { + case PPCISD::READ_TIME_BASE: ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, MVT::Other, N->getOperand(0))); return; - } case PPCISD::SRA_ADDZE: { SDValue N0 = N->getOperand(0); @@ -2690,6 +2714,19 @@ void PPCDAGToDAGISel::Select(SDNode *N) { CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); return; } + // If this is a negated 64-bit zero-extension mask, + // i.e. the immediate is a sequence of ones from most significant side + // and all zero for reminder, we should use rldicr. + if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && + isMask_64(~Imm64)) { + SDValue Val = N->getOperand(0); + MB = 63 - countTrailingOnes(~Imm64); + SH = 0; + SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); + return; + } + // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); @@ -2911,8 +2948,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); return; } - break; + case ISD::VECTOR_SHUFFLE: if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64)) { @@ -2940,7 +2977,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; - CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops); + SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, + N->getValueType(0), Ops); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1); return; } } @@ -3088,7 +3129,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue(Tmp, 0), GA)); return; } - case PPCISD::PPC32_PICGOT: { + case PPCISD::PPC32_PICGOT: // Generate a PIC-safe GOT reference. assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); @@ -3096,7 +3137,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::i32); return; - } + case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether // the first operand is odd or even, positive or negative. @@ -3139,7 +3180,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue TmpVal = SDValue(Tmp, 0); ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); return; - } else if (Elt > 0) { // Elt is odd and positive, in the range [17,31]. // @@ -3154,7 +3194,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), SDValue(Tmp2, 0))); return; - } else { // Elt is odd and negative, in the range [-31,-17]. // @@ -3199,7 +3238,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { EVT VT = N->getValueType(0); SDValue RHS, LHS; - bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + bool BytesFound[8] = {false, false, false, false, false, false, false, false}; uint64_t Mask = 0, Alt = 0; auto IsByteSelectCC = [this](SDValue O, unsigned &b, @@ -3499,7 +3538,6 @@ void PPCDAGToDAGISel::PreprocessISelDAG() { /// PostprocessISelDAG - Perform some late peephole optimizations /// on the DAG representation. void PPCDAGToDAGISel::PostprocessISelDAG() { - // Skip peepholes at -O0. if (TM.getOptLevel() == CodeGenOpt::None) return; @@ -3515,10 +3553,6 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { // be folded with the isel so that we don't need to materialize a register // containing zero. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { - // If we're not using isel, then this does not matter. - if (!PPCSubTarget->hasISEL()) - return false; - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { SDNode *User = *UI; @@ -4520,7 +4554,6 @@ void PPCDAGToDAGISel::PeepholePPC64() { } } - /// createPPCISelDag - This pass converts a legalized DAG into a /// PowerPC-specific DAG, ready for instruction scheduling. /// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2b9195b095e1..f7663d8e5185 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11,39 +11,88 @@ // //===----------------------------------------------------------------------===// -#include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCCallingConv.h" #include "PPCCCState.h" +#include "PPCFrameLowering.h" +#include "PPCInstrInfo.h" +#include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" +#include "PPCRegisterInfo.h" +#include "PPCSubtarget.h" #include "PPCTargetMachine.h" -#include "PPCTargetObjectFile.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> #include <list> +#include <utility> +#include <vector> using namespace llvm; @@ -1525,7 +1574,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { - // Check that the mask is shuffling words for (unsigned i = 0; i < 4; ++i) { unsigned B0 = N->getMaskElt(i*4); @@ -1643,7 +1691,6 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // If the element isn't a constant, bail fully out. if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); - if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) @@ -2026,7 +2073,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, } if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { - // Common code will reject creating a pre-inc form if the base pointer // is a frame index, or if N is a store and the base pointer is either // the same as or a predecessor of the value being stored. Check for @@ -2277,7 +2323,6 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - // FIXME: TLS addresses currently use medium model code sequences, // which is the most useful form. Eventually support for small and // large models could be added if users need it, at the cost of @@ -2602,10 +2647,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__trampoline_setup", PtrVT), - std::move(Args)); + CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( + CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.second; @@ -2737,7 +2781,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -bool +bool llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, @@ -2752,7 +2796,7 @@ llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, unsigned RegNum = State.getFirstUnallocated(ArgRegs); int RegsLeft = NumArgRegs - RegNum; - // Skip if there is not enough registers left for long double type (4 gpr regs + // Skip if there is not enough registers left for long double type (4 gpr regs // in soft float mode) and put long double argument on the stack. if (RegNum != NumArgRegs && RegsLeft < 4) { for (int i = 0; i < RegsLeft; i++) { @@ -4066,7 +4110,7 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget, static bool hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { - if (CS->arg_size() != CallerFn->getArgumentList().size()) + if (CS->arg_size() != CallerFn->arg_size()) return false; ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); @@ -4222,11 +4266,12 @@ namespace { struct TailCallArgumentInfo { SDValue Arg; SDValue FrameIdxOp; - int FrameIdx; + int FrameIdx = 0; - TailCallArgumentInfo() : FrameIdx(0) {} + TailCallArgumentInfo() = default; }; -} + +} // end anonymous namespace /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. static void StoreTailCallArgumentsToStackSlot( @@ -4406,7 +4451,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { - bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); @@ -4602,7 +4646,6 @@ SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - SmallVector<CCValAssign, 16> RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); @@ -4649,7 +4692,6 @@ SDValue PPCTargetLowering::FinishCall( SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { - std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, @@ -5059,7 +5101,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { - bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); @@ -5105,10 +5146,30 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( }; const unsigned NumGPRs = array_lengthof(GPR); - const unsigned NumFPRs = 13; + const unsigned NumFPRs = useSoftFloat() ? 0 : 13; const unsigned NumVRs = array_lengthof(VR); const unsigned NumQFPRs = NumFPRs; + // On ELFv2, we can avoid allocating the parameter area if all the arguments + // can be passed to the callee in registers. + // For the fast calling convention, there is another check below. + // Note: We should keep consistent with LowerFormalArguments_64SVR4() + bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast; + if (!HasParameterArea) { + unsigned ParamAreaSize = NumGPRs * PtrByteSize; + unsigned AvailableFPRs = NumFPRs; + unsigned AvailableVRs = NumVRs; + unsigned NumBytesTmp = NumBytes; + for (unsigned i = 0; i != NumOps; ++i) { + if (Outs[i].Flags.isNest()) continue; + if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags, + PtrByteSize, LinkageSize, ParamAreaSize, + NumBytesTmp, AvailableFPRs, AvailableVRs, + Subtarget.hasQPX())) + HasParameterArea = true; + } + } + // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; @@ -5176,13 +5237,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( unsigned NumBytesActuallyUsed = NumBytes; - // The prolog code of the callee may store up to 8 GPR argument registers to + // In the old ELFv1 ABI, + // the prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. - // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. - NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); + // In the ELFv2 ABI, we allocate the parameter area iff a callee + // really requires memory operands, e.g. a vararg function. + if (HasParameterArea) + NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); + else + NumBytes = LinkageSize; // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && @@ -5401,6 +5467,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( if (CallConv == CallingConv::Fast) ComputePtrOff(); + assert(HasParameterArea && + "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); @@ -5486,6 +5554,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } + assert(HasParameterArea && + "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); @@ -5520,6 +5590,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( // GPRs when within range. For now, we always put the value in both // locations (or even all three). if (isVarArg) { + assert(HasParameterArea && + "Parameter area must exist if we have a varargs call."); // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = @@ -5552,6 +5624,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( if (CallConv == CallingConv::Fast) ComputePtrOff(); + assert(HasParameterArea && + "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); @@ -5572,6 +5646,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( case MVT::v4i1: { bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; if (isVarArg) { + assert(HasParameterArea && + "Parameter area must exist if we have a varargs call."); // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = @@ -5604,6 +5680,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( if (CallConv == CallingConv::Fast) ComputePtrOff(); + assert(HasParameterArea && + "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); @@ -5618,7 +5696,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( } } - assert(NumBytesActuallyUsed == ArgOffset); + assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && + "mismatch in size of parameter area"); (void)NumBytesActuallyUsed; if (!MemOpChains.empty()) @@ -5673,7 +5752,6 @@ SDValue PPCTargetLowering::LowerCall_Darwin( const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { - unsigned NumOps = Outs.size(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -6065,7 +6143,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { - SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); @@ -7612,7 +7689,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); } - } if (Subtarget.hasQPX()) { @@ -7792,24 +7868,39 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = - cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); + cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; isDot = false; switch (IntrinsicID) { - default: return false; - // Comparison predicates. - case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + default: + return false; + // Comparison predicates. + case Intrinsic::ppc_altivec_vcmpbfp_p: + CompareOpc = 966; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpeqfp_p: + CompareOpc = 198; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpequb_p: + CompareOpc = 6; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpequh_p: + CompareOpc = 70; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpequw_p: + CompareOpc = 134; + isDot = true; + break; case Intrinsic::ppc_altivec_vcmpequd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 199; - isDot = 1; + isDot = true; } else return false; - break; case Intrinsic::ppc_altivec_vcmpneb_p: case Intrinsic::ppc_altivec_vcmpneh_p: @@ -7818,45 +7909,80 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpnezh_p: case Intrinsic::ppc_altivec_vcmpnezw_p: if (Subtarget.hasP9Altivec()) { - switch(IntrinsicID) { - default: llvm_unreachable("Unknown comparison intrinsic."); - case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break; - case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break; - case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break; - case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break; - case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break; - case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break; + switch (IntrinsicID) { + default: + llvm_unreachable("Unknown comparison intrinsic."); + case Intrinsic::ppc_altivec_vcmpneb_p: + CompareOpc = 7; + break; + case Intrinsic::ppc_altivec_vcmpneh_p: + CompareOpc = 71; + break; + case Intrinsic::ppc_altivec_vcmpnew_p: + CompareOpc = 135; + break; + case Intrinsic::ppc_altivec_vcmpnezb_p: + CompareOpc = 263; + break; + case Intrinsic::ppc_altivec_vcmpnezh_p: + CompareOpc = 327; + break; + case Intrinsic::ppc_altivec_vcmpnezw_p: + CompareOpc = 391; + break; } - isDot = 1; + isDot = true; } else return false; - break; - case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgefp_p: + CompareOpc = 454; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtfp_p: + CompareOpc = 710; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtsb_p: + CompareOpc = 774; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtsh_p: + CompareOpc = 838; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtsw_p: + CompareOpc = 902; + isDot = true; + break; case Intrinsic::ppc_altivec_vcmpgtsd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 967; - isDot = 1; + isDot = true; } else return false; - break; - case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtub_p: + CompareOpc = 518; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtuh_p: + CompareOpc = 582; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtuw_p: + CompareOpc = 646; + isDot = true; + break; case Intrinsic::ppc_altivec_vcmpgtud_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 711; - isDot = 1; + isDot = true; } else return false; - break; - // VSX predicate comparisons use the same infrastructure + + // VSX predicate comparisons use the same infrastructure case Intrinsic::ppc_vsx_xvcmpeqdp_p: case Intrinsic::ppc_vsx_xvcmpgedp_p: case Intrinsic::ppc_vsx_xvcmpgtdp_p: @@ -7865,33 +7991,51 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_vsx_xvcmpgtsp_p: if (Subtarget.hasVSX()) { switch (IntrinsicID) { - case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; - case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; - case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; - case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; - case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; - case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; + case Intrinsic::ppc_vsx_xvcmpeqdp_p: + CompareOpc = 99; + break; + case Intrinsic::ppc_vsx_xvcmpgedp_p: + CompareOpc = 115; + break; + case Intrinsic::ppc_vsx_xvcmpgtdp_p: + CompareOpc = 107; + break; + case Intrinsic::ppc_vsx_xvcmpeqsp_p: + CompareOpc = 67; + break; + case Intrinsic::ppc_vsx_xvcmpgesp_p: + CompareOpc = 83; + break; + case Intrinsic::ppc_vsx_xvcmpgtsp_p: + CompareOpc = 75; + break; } - isDot = 1; - } - else + isDot = true; + } else return false; - break; - // Normal Comparisons. - case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + // Normal Comparisons. + case Intrinsic::ppc_altivec_vcmpbfp: + CompareOpc = 966; + break; + case Intrinsic::ppc_altivec_vcmpeqfp: + CompareOpc = 198; + break; + case Intrinsic::ppc_altivec_vcmpequb: + CompareOpc = 6; + break; + case Intrinsic::ppc_altivec_vcmpequh: + CompareOpc = 70; + break; + case Intrinsic::ppc_altivec_vcmpequw: + CompareOpc = 134; + break; case Intrinsic::ppc_altivec_vcmpequd: - if (Subtarget.hasP8Altivec()) { + if (Subtarget.hasP8Altivec()) CompareOpc = 199; - isDot = 0; - } else + else return false; - break; case Intrinsic::ppc_altivec_vcmpneb: case Intrinsic::ppc_altivec_vcmpneh: @@ -7899,43 +8043,67 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpnezb: case Intrinsic::ppc_altivec_vcmpnezh: case Intrinsic::ppc_altivec_vcmpnezw: - if (Subtarget.hasP9Altivec()) { + if (Subtarget.hasP9Altivec()) switch (IntrinsicID) { - default: llvm_unreachable("Unknown comparison intrinsic."); - case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break; - case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break; - case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break; - case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break; - case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break; - case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break; + default: + llvm_unreachable("Unknown comparison intrinsic."); + case Intrinsic::ppc_altivec_vcmpneb: + CompareOpc = 7; + break; + case Intrinsic::ppc_altivec_vcmpneh: + CompareOpc = 71; + break; + case Intrinsic::ppc_altivec_vcmpnew: + CompareOpc = 135; + break; + case Intrinsic::ppc_altivec_vcmpnezb: + CompareOpc = 263; + break; + case Intrinsic::ppc_altivec_vcmpnezh: + CompareOpc = 327; + break; + case Intrinsic::ppc_altivec_vcmpnezw: + CompareOpc = 391; + break; } - isDot = 0; - } else + else return false; break; - case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgefp: + CompareOpc = 454; + break; + case Intrinsic::ppc_altivec_vcmpgtfp: + CompareOpc = 710; + break; + case Intrinsic::ppc_altivec_vcmpgtsb: + CompareOpc = 774; + break; + case Intrinsic::ppc_altivec_vcmpgtsh: + CompareOpc = 838; + break; + case Intrinsic::ppc_altivec_vcmpgtsw: + CompareOpc = 902; + break; case Intrinsic::ppc_altivec_vcmpgtsd: - if (Subtarget.hasP8Altivec()) { + if (Subtarget.hasP8Altivec()) CompareOpc = 967; - isDot = 0; - } else + else return false; - break; - case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtub: + CompareOpc = 518; + break; + case Intrinsic::ppc_altivec_vcmpgtuh: + CompareOpc = 582; + break; + case Intrinsic::ppc_altivec_vcmpgtuw: + CompareOpc = 646; + break; case Intrinsic::ppc_altivec_vcmpgtud: - if (Subtarget.hasP8Altivec()) { + if (Subtarget.hasP8Altivec()) CompareOpc = 711; - isDot = 0; - } else + else return false; - break; } return true; @@ -8044,7 +8212,7 @@ SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); @@ -9174,10 +9342,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineFunction *F = BB->getParent(); - if (Subtarget.hasISEL() && - (MI.getOpcode() == PPC::SELECT_CC_I4 || + if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || - MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { + MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) { SmallVector<MachineOperand, 2> Cond; if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8) @@ -9417,7 +9584,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) BB = EmitAtomicBinary(MI, BB, 8, 0); - else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || (Subtarget.hasPartwordAtomics() && @@ -10028,14 +10194,12 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { return false; } - /// This function is called when we have proved that a SETCC node can be replaced /// by subtraction (and other supporting instructions) so that the result of /// comparison is kept in a GPR instead of CR. This function is purely for /// codegen purposes and has some flags to guide the codegen process. static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG) { - assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); // Zero extend the operands to the largest legal integer. Originally, they @@ -10068,7 +10232,6 @@ static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const { - assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); SelectionDAG &DAG = DCI.DAG; @@ -11227,9 +11390,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); + // If the type of BSWAP operand is wider than stored memory width + // it need to be shifted to the right side before STBRX. + EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); + if (Op1VT.bitsGT(mVT)) { + int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); + BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, + DAG.getConstant(Shift, dl, MVT::i32)); + // Need to truncate if this is a bswap of i64 stored as i32/i16. + if (Op1VT == MVT::i64) + BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp); + } + SDValue Ops[] = { - N->getOperand(0), BSwapOp, N->getOperand(2), - DAG.getValueType(N->getOperand(1).getValueType()) + N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT) }; return DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), @@ -11570,7 +11744,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } break; - case ISD::INTRINSIC_W_CHAIN: { + case ISD::INTRINSIC_W_CHAIN: // For little endian, VSX loads require generating lxvd2x/xxswapd. // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (Subtarget.needsSwapsForVSXMemOps()) { @@ -11583,8 +11757,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } break; - } - case ISD::INTRINSIC_VOID: { + case ISD::INTRINSIC_VOID: // For little endian, VSX stores require generating xxswapd/stxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. if (Subtarget.needsSwapsForVSXMemOps()) { @@ -11597,7 +11770,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } break; - } case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && @@ -11635,9 +11807,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Return N so it doesn't get rechecked! return SDValue(N, 0); } - break; - case PPCISD::VCMP: { + case PPCISD::VCMP: // If a VCMPo node already exists with exactly the same operands as this // node, use its result instead of this node (VCMPo computes both a CR6 and // a normal output). @@ -11687,7 +11858,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(VCMPoNode, 0); } break; - } case ISD::BRCOND: { SDValue Cond = N->getOperand(1); SDValue Target = N->getOperand(2); @@ -11847,6 +12017,7 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); @@ -12295,7 +12466,6 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { - switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfd: case Intrinsic::ppc_qpx_qvlfs: @@ -12753,7 +12923,6 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const { } bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - if (!VT.isSimple() || !Subtarget.hasVSX()) return false; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 05acd25ae5fc..6113eb58f421 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -17,13 +17,26 @@ #include "PPC.h" #include "PPCInstrInfo.h" -#include "PPCRegisterInfo.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" #include "llvm/Target/TargetLowering.h" +#include <utility> namespace llvm { + namespace PPCISD { + enum NodeType : unsigned { // Start the numbering where the builtin ops and target ops leave off. FIRST_NUMBER = ISD::BUILTIN_OP_END, @@ -398,10 +411,12 @@ namespace llvm { /// the last operand. TOC_ENTRY }; - } + + } // end namespace PPCISD /// Define some predicates that are used for node matching. namespace PPC { + /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, @@ -465,7 +480,8 @@ namespace llvm { /// If this is a qvaligni shuffle mask, return the shift /// amount, otherwise return -1. int isQVALIGNIShuffleMask(SDNode *N); - } + + } // end namespace PPC class PPCTargetLowering : public TargetLowering { const PPCSubtarget &Subtarget; @@ -492,6 +508,7 @@ namespace llvm { return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } + bool useSoftFloat() const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { @@ -514,6 +531,10 @@ namespace llvm { return true; } + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { + return VT.isScalarInteger(); + } + bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && @@ -587,6 +608,7 @@ namespace llvm { void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; @@ -694,6 +716,10 @@ namespace llvm { bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + bool convertSelectOfConstantsToMath() const override { + return true; + } + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, @@ -785,15 +811,13 @@ namespace llvm { SDValue Chain; SDValue ResChain; MachinePointerInfo MPI; - bool IsDereferenceable; - bool IsInvariant; - unsigned Alignment; + bool IsDereferenceable = false; + bool IsInvariant = false; + unsigned Alignment = 0; AAMDNodes AAInfo; - const MDNode *Ranges; + const MDNode *Ranges = nullptr; - ReuseLoadInfo() - : IsDereferenceable(false), IsInvariant(false), Alignment(0), - Ranges(nullptr) {} + ReuseLoadInfo() = default; MachineMemOperand::Flags MMOFlags() const { MachineMemOperand::Flags F = MachineMemOperand::MONone; @@ -906,15 +930,13 @@ namespace llvm { const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override; - SDValue - LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; - bool - CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - LLVMContext &Context) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, @@ -994,14 +1016,16 @@ namespace llvm { CCAssignFn *useFastISelCCs(unsigned Flag) const; SDValue - combineElementTruncationToVectorTruncation(SDNode *N, - DAGCombinerInfo &DCI) const; + combineElementTruncationToVectorTruncation(SDNode *N, + DAGCombinerInfo &DCI) const; }; namespace PPC { + FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo); - } + + } // end namespace PPC bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, @@ -1026,6 +1050,7 @@ namespace llvm { CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -} -#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H +} // end namespace llvm + +#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index fbec8787ef8d..997b96ca6ec8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -253,11 +253,11 @@ def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC), Requires<[IsISA3_0]>; } -let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in +let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT; -let mayStore = 1, hasSideEffects = 0 in +let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC), "stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64, Requires<[IsISA3_0]>; @@ -1082,7 +1082,7 @@ def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), } // Stores with Update (pre-inc). -let PPC970_Unit = 2, mayStore = 1 in { +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), "stbu $rS, $dst", IIC_LdStStoreUpd, []>, @@ -1232,6 +1232,10 @@ def : Pat<(srl i64:$rS, i32:$rB), def : Pat<(shl i64:$rS, i32:$rB), (SLD $rS, $rB)>; +// SUBFIC +def : Pat<(sub imm64SExt16:$imm, i64:$in), + (SUBFIC8 $in, imm:$imm)>; + // SHL/SRL def : Pat<(shl i64:$in, (i32 imm:$imm)), (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 5c022749ad64..c380766e9f5c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -407,7 +407,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), "mtvscr $vB", IIC_LdStLoad, [(int_ppc_altivec_mtvscr v4i32:$vB)]>; -let PPC970_Unit = 2 in { // Loads. +let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src), "lvebx $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; @@ -434,7 +434,7 @@ def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src), [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; -let PPC970_Unit = 2 in { // Stores. +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst), "stvebx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; @@ -851,6 +851,10 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), // Additional Altivec Patterns // +// Extended mnemonics +def : InstAlias<"vmr $vD, $vA", (VOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>; +def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>; + // Loads. def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 2e0b9355f82b..8e159f47ea2e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -65,7 +65,9 @@ UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, void PPCInstrInfo::anchor() {} PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI) - : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), + : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP, + /* CatchRetOpcode */ -1, + STI.isPPC64() ? PPC::BLR8 : PPC::BLR), Subtarget(STI), RI(STI.getTargetMachine()) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for @@ -662,12 +664,14 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); else if (Cond[0].getImm() == PPC::PRED_BIT_SET) - BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB); + BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB); else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) - BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB); + BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB); else // Conditional branch BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB); + .addImm(Cond[0].getImm()) + .add(Cond[1]) + .addMBB(TBB); return 1; } @@ -677,12 +681,14 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); else if (Cond[0].getImm() == PPC::PRED_BIT_SET) - BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB); + BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB); else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) - BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB); + BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB); else BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB); + .addImm(Cond[0].getImm()) + .add(Cond[1]) + .addMBB(TBB); BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } @@ -692,9 +698,6 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { - if (!Subtarget.hasISEL()) - return false; - if (Cond.size() != 2) return false; @@ -736,9 +739,6 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, assert(Cond.size() == 2 && "PPC branch conditions have two components!"); - assert(Subtarget.hasISEL() && - "Cannot insert select on target without ISEL support"); - // Get the register classes. MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = @@ -1493,7 +1493,7 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI, return Found; } -bool PPCInstrInfo::isPredicable(MachineInstr &MI) const { +bool PPCInstrInfo::isPredicable(const MachineInstr &MI) const { unsigned OpC = MI.getOpcode(); switch (OpC) { default: @@ -1836,8 +1836,7 @@ unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { PatchPointOpers Opers(&MI); return Opers.getNumPatchBytes(); } else { - const MCInstrDesc &Desc = get(Opcode); - return Desc.getSize(); + return get(Opcode).getSize(); } } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 32b2f009a3f5..f11aed8fa268 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -253,7 +253,7 @@ public: bool DefinesPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred) const override; - bool isPredicable(MachineInstr &MI) const override; + bool isPredicable(const MachineInstr &MI) const override; // Comparison optimization. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index f615cc7cc974..f004ce49cac0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -114,9 +114,9 @@ def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, [SDNPHasChain, SDNPMayLoad]>; def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, @@ -243,7 +243,7 @@ def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, [SDNPHasChain, SDNPOptInGlue]>; def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; @@ -770,9 +770,10 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned. } // A single-register address. This is used with the SjLj -// pseudo-instructions. +// pseudo-instructions which tranlates to LD/LWZ. These instructions requires +// G8RC_NOX0 registers. def memr : Operand<iPTR> { - let MIOperandInfo = (ops ptr_rc:$ptrreg); + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg); } def PPCTLSRegOperand : AsmOperandClass { let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; @@ -1648,7 +1649,7 @@ let usesCustomInserter = 1 in { } // Instructions to support atomic operations -let mayLoad = 1, hasSideEffects = 0 in { +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), "lbarx $rD, $src", IIC_LdStLWARX, []>, Requires<[HasPartwordAtomics]>; @@ -1681,7 +1682,7 @@ def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC), Requires<[IsISA3_0]>; } -let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in { +let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst), "stbcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT, Requires<[HasPartwordAtomics]>; @@ -1694,7 +1695,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT; } -let mayStore = 1, hasSideEffects = 0 in +let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$rS, gprc:$rA, u5imm:$FC), "stwat $rS, $rA, $FC", IIC_LdStStore>, Requires<[IsISA3_0]>; @@ -1740,7 +1741,7 @@ def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). -let mayLoad = 1, hasSideEffects = 0 in { +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, @@ -1813,7 +1814,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), // Indexed (r+r) Loads. // -let PPC970_Unit = 2 in { +let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src), "lbzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 xaddr:$src))]>; @@ -1827,8 +1828,6 @@ def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src), def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src), "lwzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load xaddr:$src))]>; - - def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src), "lhbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; @@ -1860,7 +1859,7 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), // // Unindexed (r+i) Stores. -let PPC970_Unit = 2 in { +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src), "stb $rS, $src", IIC_LdStStore, [(truncstorei8 i32:$rS, iaddr:$src)]>; @@ -1879,7 +1878,7 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), } // Unindexed (r+i) Stores with Update (preinc). -let PPC970_Unit = 2, mayStore = 1 in { +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), "stbu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; @@ -1948,7 +1947,7 @@ def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), } // Indexed (r+r) Stores with Update (preinc). -let PPC970_Unit = 2, mayStore = 1 in { +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), "stbux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 0d9e3459f47e..13603732397a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -62,7 +62,7 @@ def SDTVecConv : SDTypeProfile<1, 2, [ ]>; def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; @@ -117,7 +117,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects. let Uses = [RM] in { // Load indexed instructions - let mayLoad = 1 in { + let mayLoad = 1, mayStore = 0 in { let CodeSize = 3 in def LXSDX : XX1Form<31, 588, (outs vsfrc:$XT), (ins memrr:$src), @@ -142,7 +142,7 @@ let Uses = [RM] in { } // mayLoad // Store indexed instructions - let mayStore = 1 in { + let mayStore = 1, mayLoad = 0 in { let CodeSize = 3 in def STXSDX : XX1Form<31, 716, (outs), (ins vsfrc:$XT, memrr:$dst), @@ -1197,7 +1197,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; // VSX scalar loads introduced in ISA 2.07 - let mayLoad = 1 in { + let mayLoad = 1, mayStore = 0 in { let CodeSize = 3 in def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), "lxsspx $XT, $src", IIC_LdStLFD, @@ -1211,7 +1211,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. } // mayLoad // VSX scalar stores introduced in ISA 2.07 - let mayStore = 1 in { + let mayStore = 1, mayLoad = 0 in { let CodeSize = 3 in def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), "stxsspx $XT, $dst", IIC_LdStSTFD, @@ -1410,6 +1410,11 @@ let Predicates = [HasDirectMove] in { "mfvsrd $rA, $XT", IIC_VecGeneral, [(set i64:$rA, (PPCmfvsr f64:$XT))]>, Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in + def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + []>, + Requires<[In64BitMode]>; def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), "mfvsrwz $rA, $XT", IIC_VecGeneral, [(set i32:$rA, (PPCmfvsr f64:$XT))]>; @@ -1440,6 +1445,13 @@ let Predicates = [IsISA3_0, HasDirectMove] in { } // IsISA3_0, HasDirectMove } // UseVSXReg = 1 +// We want to parse this from asm, but we don't want to emit this as it would +// be emitted with a VSX reg. So leave Emit = 0 here. +def : InstAlias<"mfvrd $rA, $XT", + (MFVRD g8rc:$rA, vrrc:$XT), 0>; +def : InstAlias<"mffprd $rA, $src", + (MFVSRD g8rc:$rA, f8rc:$src)>; + /* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than a doubleword are shifted left and moved for BE. For LE, they're moved, then @@ -2186,7 +2198,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { } // UseVSXReg = 1 // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a - // seperate pattern so that it can convert the input register class from + // separate pattern so that it can convert the input register class from // VRRC(v8i16) to VSRC. def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)), (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; @@ -2335,7 +2347,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. - let mayLoad = 1 in { + let mayLoad = 1, mayStore = 0 in { // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg; @@ -2383,7 +2395,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. - let mayStore = 1 in { + let mayStore = 1, mayLoad = 0 in { // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 2c3e75523e8f..a349fa1b4090 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -72,9 +73,10 @@ namespace { public: static char ID; // Pass ID, replacement for typeid - PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) { + PPCLoopPreIncPrep() : FunctionPass(ID) { initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); } + PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); } @@ -93,7 +95,7 @@ namespace { bool rotateLoop(Loop *L); private: - PPCTargetMachine *TM; + PPCTargetMachine *TM = nullptr; DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index e527b018d4fb..541b98e01b99 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -148,7 +148,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, MCOperand MCOp; switch (MO.getType()) { default: - MI->dump(); + MI->print(errs()); llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: assert(!MO.getSubReg() && "Subregs should be eliminated!"); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 2413af3f7042..c6d2c3ebcc0f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -147,9 +147,9 @@ bool PPCMIPeephole::simplifyCode(void) { << "Optimizing load-and-splat/splat " "to load-and-splat/copy: "); DEBUG(MI.dump()); - BuildMI(MBB, &MI, MI.getDebugLoc(), - TII->get(PPC::COPY), MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)); ToErase = &MI; Simplified = true; } @@ -169,9 +169,9 @@ bool PPCMIPeephole::simplifyCode(void) { << "Optimizing splat/swap or splat/splat " "to splat/copy: "); DEBUG(MI.dump()); - BuildMI(MBB, &MI, MI.getDebugLoc(), - TII->get(PPC::COPY), MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)); ToErase = &MI; Simplified = true; } @@ -194,9 +194,9 @@ bool PPCMIPeephole::simplifyCode(void) { else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) { DEBUG(dbgs() << "Optimizing swap/swap => copy: "); DEBUG(MI.dump()); - BuildMI(MBB, &MI, MI.getDebugLoc(), - TII->get(PPC::COPY), MI.getOperand(0).getReg()) - .addOperand(DefMI->getOperand(1)); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(DefMI->getOperand(1)); ToErase = &MI; Simplified = true; } @@ -251,7 +251,7 @@ bool PPCMIPeephole::simplifyCode(void) { DEBUG(MI.dump()); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(OpNo)); + .add(MI.getOperand(OpNo)); ToErase = &MI; Simplified = true; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index 9d91e31165de..bc2d9a08b5e8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -8,14 +8,13 @@ //===----------------------------------------------------------------------===// #include "PPCMachineFunctionInfo.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -void PPCFunctionInfo::anchor() { } +void PPCFunctionInfo::anchor() {} MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const { const DataLayout &DL = MF.getDataLayout(); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 4c29aa06f048..202e10058b73 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -26,17 +27,17 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// FramePointerSaveIndex - Frame index of where the old frame pointer is /// stored. Also used as an anchor for instructions that need to be altered /// when using frame pointers (dyna_add, dyna_sub.) - int FramePointerSaveIndex; + int FramePointerSaveIndex = 0; /// ReturnAddrSaveIndex - Frame index of where the return address is stored. /// - int ReturnAddrSaveIndex; + int ReturnAddrSaveIndex = 0; /// Frame index where the old base pointer is stored. - int BasePointerSaveIndex; + int BasePointerSaveIndex = 0; /// Frame index where the old PIC base pointer is stored. - int PICBasePointerSaveIndex; + int PICBasePointerSaveIndex = 0; /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current /// function. This is only valid after the initial scan of the function by @@ -44,54 +45,58 @@ class PPCFunctionInfo : public MachineFunctionInfo { bool MustSaveLR; /// Does this function have any stack spills. - bool HasSpills; + bool HasSpills = false; /// Does this function spill using instructions with only r+r (not r+i) /// forms. - bool HasNonRISpills; + bool HasNonRISpills = false; /// SpillsCR - Indicates whether CR is spilled in the current function. - bool SpillsCR; + bool SpillsCR = false; /// Indicates whether VRSAVE is spilled in the current function. - bool SpillsVRSAVE; + bool SpillsVRSAVE = false; /// LRStoreRequired - The bool indicates whether there is some explicit use of /// the LR/LR8 stack slot that is not obvious from scanning the code. This /// requires that the code generator produce a store of LR to the stack on /// entry, even though LR may otherwise apparently not be used. - bool LRStoreRequired; + bool LRStoreRequired = false; /// This function makes use of the PPC64 ELF TOC base pointer (register r2). - bool UsesTOCBasePtr; + bool UsesTOCBasePtr = false; /// MinReservedArea - This is the frame size that is at least reserved in a /// potential caller (parameter+linkage area). - unsigned MinReservedArea; + unsigned MinReservedArea = 0; /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum /// amount the stack pointer is adjusted to make the frame bigger for tail /// calls. Used for creating an area before the register spill area. - int TailCallSPDelta; + int TailCallSPDelta = 0; /// HasFastCall - Does this function contain a fast call. Used to determine /// how the caller's stack pointer should be calculated (epilog/dynamicalloc). - bool HasFastCall; + bool HasFastCall = false; /// VarArgsFrameIndex - FrameIndex for start of varargs area. - int VarArgsFrameIndex; + int VarArgsFrameIndex = 0; + /// VarArgsStackOffset - StackOffset for start of stack /// arguments. - int VarArgsStackOffset; + + int VarArgsStackOffset = 0; + /// VarArgsNumGPR - Index of the first unused integer /// register for parameter passing. - unsigned VarArgsNumGPR; + unsigned VarArgsNumGPR = 0; + /// VarArgsNumFPR - Index of the first unused double /// register for parameter passing. - unsigned VarArgsNumFPR; + unsigned VarArgsNumFPR = 0; /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4. - int CRSpillFrameIndex; + int CRSpillFrameIndex = 0; /// If any of CR[2-4] need to be saved in the prologue and restored in the /// epilogue then they are added to this array. This is used for the @@ -102,35 +107,14 @@ class PPCFunctionInfo : public MachineFunctionInfo { MachineFunction &MF; /// Whether this uses the PIC Base register or not. - bool UsesPICBase; + bool UsesPICBase = false; /// True if this function has a subset of CSRs that is handled explicitly via /// copies - bool IsSplitCSR; + bool IsSplitCSR = false; public: - explicit PPCFunctionInfo(MachineFunction &MF) - : FramePointerSaveIndex(0), - ReturnAddrSaveIndex(0), - BasePointerSaveIndex(0), - PICBasePointerSaveIndex(0), - HasSpills(false), - HasNonRISpills(false), - SpillsCR(false), - SpillsVRSAVE(false), - LRStoreRequired(false), - UsesTOCBasePtr(false), - MinReservedArea(0), - TailCallSPDelta(0), - HasFastCall(false), - VarArgsFrameIndex(0), - VarArgsStackOffset(0), - VarArgsNumGPR(0), - VarArgsNumFPR(0), - CRSpillFrameIndex(0), - MF(MF), - UsesPICBase(0), - IsSplitCSR(false) {} + explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -211,7 +195,6 @@ public: MCSymbol *getTOCOffsetSymbol() const; }; -} // end of namespace llvm - +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index e49201402861..aad913924692 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -209,86 +209,67 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // The ZERO register is not really a register, but the representation of r0 // when used in instructions that treat r0 as the constant 0. - Reserved.set(PPC::ZERO); - Reserved.set(PPC::ZERO8); + markSuperRegs(Reserved, PPC::ZERO); // The FP register is also not really a register, but is the representation // of the frame pointer register used by ISD::FRAMEADDR. - Reserved.set(PPC::FP); - Reserved.set(PPC::FP8); + markSuperRegs(Reserved, PPC::FP); // The BP register is also not really a register, but is the representation // of the base pointer register used by setjmp. - Reserved.set(PPC::BP); - Reserved.set(PPC::BP8); + markSuperRegs(Reserved, PPC::BP); // The counter registers must be reserved so that counter-based loops can // be correctly formed (and the mtctr instructions are not DCE'd). - Reserved.set(PPC::CTR); - Reserved.set(PPC::CTR8); + markSuperRegs(Reserved, PPC::CTR); + markSuperRegs(Reserved, PPC::CTR8); - Reserved.set(PPC::R1); - Reserved.set(PPC::LR); - Reserved.set(PPC::LR8); - Reserved.set(PPC::RM); + markSuperRegs(Reserved, PPC::R1); + markSuperRegs(Reserved, PPC::LR); + markSuperRegs(Reserved, PPC::LR8); + markSuperRegs(Reserved, PPC::RM); if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec()) - Reserved.set(PPC::VRSAVE); + markSuperRegs(Reserved, PPC::VRSAVE); // The SVR4 ABI reserves r2 and r13 if (Subtarget.isSVR4ABI()) { - Reserved.set(PPC::R2); // System-reserved register - Reserved.set(PPC::R13); // Small Data Area pointer register + // We only reserve r2 if we need to use the TOC pointer. If we have no + // explicit uses of the TOC pointer (meaning we're a leaf function with + // no constant-pool loads, etc.) and we have no potential uses inside an + // inline asm block, then we can treat r2 has an ordinary callee-saved + // register. + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + if (!TM.isPPC64() || FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm()) + markSuperRegs(Reserved, PPC::R2); // System-reserved register + markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register } // On PPC64, r13 is the thread pointer. Never allocate this register. - if (TM.isPPC64()) { - Reserved.set(PPC::R13); - - Reserved.set(PPC::X1); - Reserved.set(PPC::X13); - - if (TFI->needsFP(MF)) - Reserved.set(PPC::X31); - - if (hasBasePointer(MF)) - Reserved.set(PPC::X30); - - // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. - if (Subtarget.isSVR4ABI()) { - // We only reserve r2 if we need to use the TOC pointer. If we have no - // explicit uses of the TOC pointer (meaning we're a leaf function with - // no constant-pool loads, etc.) and we have no potential uses inside an - // inline asm block, then we can treat r2 has an ordinary callee-saved - // register. - const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - if (FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm()) - Reserved.set(PPC::X2); - else - Reserved.reset(PPC::R2); - } - } + if (TM.isPPC64()) + markSuperRegs(Reserved, PPC::R13); if (TFI->needsFP(MF)) - Reserved.set(PPC::R31); + markSuperRegs(Reserved, PPC::R31); bool IsPositionIndependent = TM.isPositionIndependent(); if (hasBasePointer(MF)) { if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) - Reserved.set(PPC::R29); + markSuperRegs(Reserved, PPC::R29); else - Reserved.set(PPC::R30); + markSuperRegs(Reserved, PPC::R30); } if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) - Reserved.set(PPC::R30); + markSuperRegs(Reserved, PPC::R30); // Reserve Altivec registers when Altivec is unavailable. if (!Subtarget.hasAltivec()) for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(), IE = PPC::VRRCRegClass.end(); I != IE; ++I) - Reserved.set(*I); + markSuperRegs(Reserved, *I); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td index 8e52da583a0d..79963dd6a3e9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td @@ -377,7 +377,7 @@ def P8Itineraries : ProcessorItineraries< InstrStage<1, [P8_FPU1, P8_FPU2]>], [7, 1, 1]>, InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>, - InstrStage<1, [P8_FPU2, P8_FPU2]>], + InstrStage<1, [P8_FPU1, P8_FPU2]>], [3, 1, 1]> ]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index e8a87e7f4437..ccf0f80c336b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -220,8 +220,8 @@ bool PPCSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; } -unsigned char PPCSubtarget::classifyGlobalReference( - const GlobalValue *GV) const { +unsigned char +PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const { // Note that currently we don't generate non-pic references. // If a caller wants that, this will have to be updated. @@ -229,23 +229,9 @@ unsigned char PPCSubtarget::classifyGlobalReference( if (TM.getCodeModel() == CodeModel::Large) return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; - unsigned char flags = PPCII::MO_PIC_FLAG; - - // Only if the relocation mode is PIC do we have to worry about - // interposition. In all other cases we can use a slightly looser standard to - // decide how to access the symbol. - if (TM.getRelocationModel() == Reloc::PIC_) { - // If it's local, or it's non-default, it can't be interposed. - if (!GV->hasLocalLinkage() && - GV->hasDefaultVisibility()) { - flags |= PPCII::MO_NLP_FLAG; - } - return flags; - } - - if (GV->isStrongDefinitionForLinker()) - return flags; - return flags | PPCII::MO_NLP_FLAG; + if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return PPCII::MO_PIC_FLAG; + return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; } bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h index 7fd907990ceb..5a97f595ad8c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -298,7 +298,9 @@ public: bool isSVR4ABI() const { return !isDarwinABI(); } bool isELFv2ABI() const; - bool enableEarlyIfConversion() const override { return hasISEL(); } + /// Originally, this function return hasISEL(). Now we always enable it, + /// but may expand the ISEL instruction later. + bool enableEarlyIfConversion() const override { return true; } // Scheduling customization. bool enableMachineScheduler() const override; @@ -316,6 +318,8 @@ public: /// classifyGlobalReference - Classify a global variable reference for the /// current subtarget accourding to how we should reference it. unsigned char classifyGlobalReference(const GlobalValue *GV) const; + + bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 91b1d24b2e41..7806d45b5457 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -11,21 +11,33 @@ // //===----------------------------------------------------------------------===// -#include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" +#include "PPCSubtarget.h" #include "PPCTargetObjectFile.h" +#include "PPCTargetMachine.h" #include "PPCTargetTransformInfo.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" +#include <cassert> +#include <memory> +#include <string> + using namespace llvm; static cl:: @@ -80,6 +92,7 @@ extern "C" void LLVMInitializePowerPCTarget() { PassRegistry &PR = *PassRegistry::getPassRegistry(); initializePPCBoolRetToIntPass(PR); + initializePPCExpandISELPass(PR); } /// Return the datalayout string of a subtarget. @@ -149,9 +162,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { // If it isn't a Mach-O file then it's going to be a linux ELF // object file. if (TT.isOSDarwin()) - return make_unique<TargetLoweringObjectFileMachO>(); + return llvm::make_unique<TargetLoweringObjectFileMachO>(); - return make_unique<PPC64LinuxTargetObjectFile>(); + return llvm::make_unique<PPC64LinuxTargetObjectFile>(); } static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, @@ -205,15 +218,13 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, computeFSAdditions(FS, OL, TT), Options, getEffectiveRelocModel(TT, RM), CM, OL), TLOF(createTLOF(getTargetTriple())), - TargetABI(computeTargetABI(TT, Options)), - Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) { - + TargetABI(computeTargetABI(TT, Options)) { initAsmInfo(); } -PPCTargetMachine::~PPCTargetMachine() {} +PPCTargetMachine::~PPCTargetMachine() = default; -void PPC32TargetMachine::anchor() { } +void PPC32TargetMachine::anchor() {} PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -223,7 +234,7 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, CodeGenOpt::Level OL) : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} -void PPC64TargetMachine::anchor() { } +void PPC64TargetMachine::anchor() {} PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -281,6 +292,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { //===----------------------------------------------------------------------===// namespace { + /// PPC Code Generator Pass Configuration Options. class PPCPassConfig : public TargetPassConfig { public: @@ -300,7 +312,8 @@ public: void addPreSched2() override; void addPreEmitPass() override; }; -} // namespace + +} // end anonymous namespace TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { return new PPCPassConfig(this, PM); @@ -416,6 +429,8 @@ void PPCPassConfig::addPreSched2() { } void PPCPassConfig::addPreEmitPass() { + addPass(createPPCExpandISELPass()); + if (getOptLevel() != CodeGenOpt::None) addPass(createPPCEarlyReturnPass(), false); // Must run branch selection immediately preceding the asm printer. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index 59b4f1e30c0e..f2838351cee5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -29,7 +29,6 @@ public: private: std::unique_ptr<TargetLoweringObjectFile> TLOF; PPCABI TargetABI; - PPCSubtarget Subtarget; mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h index dbe7617d3542..310fea9ef09f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h @@ -1,4 +1,4 @@ -//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===// +//===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,18 +10,26 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H #define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCStreamer.h" namespace llvm { + +class MCExpr; +class MCSymbol; +class MCSymbolELF; + class PPCTargetStreamer : public MCTargetStreamer { public: PPCTargetStreamer(MCStreamer &S); ~PPCTargetStreamer() override; + virtual void emitTCEntry(const MCSymbol &S) = 0; virtual void emitMachine(StringRef CPU) = 0; virtual void emitAbiVersion(int AbiVersion) = 0; virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0; }; -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f94d1eab097d..7ee1317bf72f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -302,14 +302,16 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, return LT.first; } -int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); return BaseT::getCastInstrCost(Opcode, Dst, Src); } -int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); +int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { @@ -352,7 +354,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { } int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { // Legalize the type. std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && @@ -401,6 +403,10 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, if (IsVSXType || (ST->hasVSX() && IsAltivecType)) return Cost; + // Newer PPC supports unaligned memory access. + if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0)) + return Cost; + // PPC in general does not support unaligned loads and stores. They'll need // to be decomposed based on the alignment factor. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 30ee2814aba1..6ce70fbd8778 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -74,11 +74,13 @@ public: TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef<const Value *> Args = ArrayRef<const Value *>()); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp index 3b5d8f094fd0..f3a0290da054 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -112,7 +112,7 @@ protected: TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) .addImm(1) // add 1, not 0, because there is no implicit clearing // of the high bits. - .addOperand(SrcMO) + .add(SrcMO) .addImm(PPC::sub_64); // The source of the original copy is now the new virtual register. @@ -132,7 +132,7 @@ protected: unsigned NewVReg = MRI.createVirtualRegister(DstRC); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg) - .addOperand(SrcMO); + .add(SrcMO); // Transform the original copy into a subregister extraction copy. SrcMO.setReg(NewVReg); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 8197285b7b1f..d3434b77be8a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -522,7 +522,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { if (RelevantFunction) { DEBUG(dbgs() << "Swap vector when first built\n\n"); - dumpSwapVector(); + DEBUG(dumpSwapVector()); } return RelevantFunction; @@ -731,7 +731,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { } DEBUG(dbgs() << "Swap vector after web analysis:\n\n"); - dumpSwapVector(); + DEBUG(dumpSwapVector()); } // Walk the swap vector entries looking for swaps fed by permuting loads @@ -936,9 +936,9 @@ bool PPCVSXSwapRemoval::removeSwaps() { Changed = true; MachineInstr *MI = SwapVector[EntryIdx].VSEMI; MachineBasicBlock *MBB = MI->getParent(); - BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) - .addOperand(MI->getOperand(1)); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), + MI->getOperand(0).getReg()) + .add(MI->getOperand(1)); DEBUG(dbgs() << format("Replaced %d with copy: ", SwapVector[EntryIdx].VSEId)); @@ -951,77 +951,78 @@ bool PPCVSXSwapRemoval::removeSwaps() { return Changed; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // For debug purposes, dump the contents of the swap vector. -void PPCVSXSwapRemoval::dumpSwapVector() { +LLVM_DUMP_METHOD void PPCVSXSwapRemoval::dumpSwapVector() { for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; int ID = SwapVector[EntryIdx].VSEId; - DEBUG(dbgs() << format("%6d", ID)); - DEBUG(dbgs() << format("%6d", EC->getLeaderValue(ID))); - DEBUG(dbgs() << format(" BB#%3d", MI->getParent()->getNumber())); - DEBUG(dbgs() << format(" %14s ", - TII->getName(MI->getOpcode()).str().c_str())); + dbgs() << format("%6d", ID); + dbgs() << format("%6d", EC->getLeaderValue(ID)); + dbgs() << format(" BB#%3d", MI->getParent()->getNumber()); + dbgs() << format(" %14s ", TII->getName(MI->getOpcode()).str().c_str()); if (SwapVector[EntryIdx].IsLoad) - DEBUG(dbgs() << "load "); + dbgs() << "load "; if (SwapVector[EntryIdx].IsStore) - DEBUG(dbgs() << "store "); + dbgs() << "store "; if (SwapVector[EntryIdx].IsSwap) - DEBUG(dbgs() << "swap "); + dbgs() << "swap "; if (SwapVector[EntryIdx].MentionsPhysVR) - DEBUG(dbgs() << "physreg "); + dbgs() << "physreg "; if (SwapVector[EntryIdx].MentionsPartialVR) - DEBUG(dbgs() << "partialreg "); + dbgs() << "partialreg "; if (SwapVector[EntryIdx].IsSwappable) { - DEBUG(dbgs() << "swappable "); + dbgs() << "swappable "; switch(SwapVector[EntryIdx].SpecialHandling) { default: - DEBUG(dbgs() << "special:**unknown**"); + dbgs() << "special:**unknown**"; break; case SH_NONE: break; case SH_EXTRACT: - DEBUG(dbgs() << "special:extract "); + dbgs() << "special:extract "; break; case SH_INSERT: - DEBUG(dbgs() << "special:insert "); + dbgs() << "special:insert "; break; case SH_NOSWAP_LD: - DEBUG(dbgs() << "special:load "); + dbgs() << "special:load "; break; case SH_NOSWAP_ST: - DEBUG(dbgs() << "special:store "); + dbgs() << "special:store "; break; case SH_SPLAT: - DEBUG(dbgs() << "special:splat "); + dbgs() << "special:splat "; break; case SH_XXPERMDI: - DEBUG(dbgs() << "special:xxpermdi "); + dbgs() << "special:xxpermdi "; break; case SH_COPYWIDEN: - DEBUG(dbgs() << "special:copywiden "); + dbgs() << "special:copywiden "; break; } } if (SwapVector[EntryIdx].WebRejected) - DEBUG(dbgs() << "rejected "); + dbgs() << "rejected "; if (SwapVector[EntryIdx].WillRemove) - DEBUG(dbgs() << "remove "); + dbgs() << "remove "; - DEBUG(dbgs() << "\n"); + dbgs() << "\n"; // For no-asserts builds. (void)MI; (void)ID; } - DEBUG(dbgs() << "\n"); + dbgs() << "\n"; } +#endif } // end default namespace |