diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
49 files changed, 2941 insertions, 1578 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 56307a84f2e5..8b3480f772e9 100644 --- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -21,7 +21,6 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" @@ -31,169 +30,7 @@ using namespace llvm; -static const MCPhysReg RRegs[32] = { - PPC::R0, PPC::R1, PPC::R2, PPC::R3, - PPC::R4, PPC::R5, PPC::R6, PPC::R7, - PPC::R8, PPC::R9, PPC::R10, PPC::R11, - PPC::R12, PPC::R13, PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31 -}; -static const MCPhysReg RRegsNoR0[32] = { - PPC::ZERO, - PPC::R1, PPC::R2, PPC::R3, - PPC::R4, PPC::R5, PPC::R6, PPC::R7, - PPC::R8, PPC::R9, PPC::R10, PPC::R11, - PPC::R12, PPC::R13, PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31 -}; -static const MCPhysReg XRegs[32] = { - PPC::X0, PPC::X1, PPC::X2, PPC::X3, - PPC::X4, PPC::X5, PPC::X6, PPC::X7, - PPC::X8, PPC::X9, PPC::X10, PPC::X11, - PPC::X12, PPC::X13, PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31 -}; -static const MCPhysReg XRegsNoX0[32] = { - PPC::ZERO8, - PPC::X1, PPC::X2, PPC::X3, - PPC::X4, PPC::X5, PPC::X6, PPC::X7, - PPC::X8, PPC::X9, PPC::X10, PPC::X11, - PPC::X12, PPC::X13, PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31 -}; -static const MCPhysReg FRegs[32] = { - PPC::F0, PPC::F1, PPC::F2, PPC::F3, - PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, - PPC::F12, PPC::F13, PPC::F14, PPC::F15, - PPC::F16, PPC::F17, PPC::F18, PPC::F19, - PPC::F20, PPC::F21, PPC::F22, PPC::F23, - PPC::F24, PPC::F25, PPC::F26, PPC::F27, - PPC::F28, PPC::F29, PPC::F30, PPC::F31 -}; -static const MCPhysReg SPERegs[32] = { - PPC::S0, PPC::S1, PPC::S2, PPC::S3, - PPC::S4, PPC::S5, PPC::S6, PPC::S7, - PPC::S8, PPC::S9, PPC::S10, PPC::S11, - PPC::S12, PPC::S13, PPC::S14, PPC::S15, - PPC::S16, PPC::S17, PPC::S18, PPC::S19, - PPC::S20, PPC::S21, PPC::S22, PPC::S23, - PPC::S24, PPC::S25, PPC::S26, PPC::S27, - PPC::S28, PPC::S29, PPC::S30, PPC::S31 -}; -static const MCPhysReg VFRegs[32] = { - PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, - PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, - PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, - PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, - PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, - PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, - PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, - PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 -}; -static const MCPhysReg VRegs[32] = { - PPC::V0, PPC::V1, PPC::V2, PPC::V3, - PPC::V4, PPC::V5, PPC::V6, PPC::V7, - PPC::V8, PPC::V9, PPC::V10, PPC::V11, - PPC::V12, PPC::V13, PPC::V14, PPC::V15, - PPC::V16, PPC::V17, PPC::V18, PPC::V19, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31 -}; -static const MCPhysReg VSRegs[64] = { - PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3, - PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7, - PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11, - PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15, - PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19, - PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23, - PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, - PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, - - PPC::V0, PPC::V1, PPC::V2, PPC::V3, - PPC::V4, PPC::V5, PPC::V6, PPC::V7, - PPC::V8, PPC::V9, PPC::V10, PPC::V11, - PPC::V12, PPC::V13, PPC::V14, PPC::V15, - PPC::V16, PPC::V17, PPC::V18, PPC::V19, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31 -}; -static const MCPhysReg VSFRegs[64] = { - PPC::F0, PPC::F1, PPC::F2, PPC::F3, - PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, - PPC::F12, PPC::F13, PPC::F14, PPC::F15, - PPC::F16, PPC::F17, PPC::F18, PPC::F19, - PPC::F20, PPC::F21, PPC::F22, PPC::F23, - PPC::F24, PPC::F25, PPC::F26, PPC::F27, - PPC::F28, PPC::F29, PPC::F30, PPC::F31, - - PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, - PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, - PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, - PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, - PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, - PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, - PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, - PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 -}; -static const MCPhysReg VSSRegs[64] = { - PPC::F0, PPC::F1, PPC::F2, PPC::F3, - PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, - PPC::F12, PPC::F13, PPC::F14, PPC::F15, - PPC::F16, PPC::F17, PPC::F18, PPC::F19, - PPC::F20, PPC::F21, PPC::F22, PPC::F23, - PPC::F24, PPC::F25, PPC::F26, PPC::F27, - PPC::F28, PPC::F29, PPC::F30, PPC::F31, - - PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, - PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, - PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, - PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, - PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, - PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, - PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, - PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 -}; -static unsigned QFRegs[32] = { - PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3, - PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, - PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, - PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15, - PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19, - PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23, - PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27, - PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31 -}; -static const MCPhysReg CRBITRegs[32] = { - PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, - PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, - PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, - PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN -}; -static const MCPhysReg CRRegs[8] = { - PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, - PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7 -}; +DEFINE_PPC_REGCLASSES; // Evaluate an expression containing condition register // or condition register field symbols. Returns positive diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index db01271b87e1..26869f250823 100644 --- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -17,6 +17,8 @@ using namespace llvm; +DEFINE_PPC_REGCLASSES; + #define DEBUG_TYPE "ppc-disassembler" typedef MCDisassembler::DecodeStatus DecodeStatus; @@ -62,184 +64,9 @@ extern "C" void LLVMInitializePowerPCDisassembler() { // FIXME: These can be generated by TableGen from the existing register // encoding values! -static const unsigned CRRegs[] = { - PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, - PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7 -}; - -static const unsigned CRBITRegs[] = { - PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, - PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, - PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, - PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN -}; - -static const unsigned FRegs[] = { - PPC::F0, PPC::F1, PPC::F2, PPC::F3, - PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, - PPC::F12, PPC::F13, PPC::F14, PPC::F15, - PPC::F16, PPC::F17, PPC::F18, PPC::F19, - PPC::F20, PPC::F21, PPC::F22, PPC::F23, - PPC::F24, PPC::F25, PPC::F26, PPC::F27, - PPC::F28, PPC::F29, PPC::F30, PPC::F31 -}; - -static const unsigned VFRegs[] = { - PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, - PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, - PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, - PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, - PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, - PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, - PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, - PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 -}; - -static const unsigned VRegs[] = { - PPC::V0, PPC::V1, PPC::V2, PPC::V3, - PPC::V4, PPC::V5, PPC::V6, PPC::V7, - PPC::V8, PPC::V9, PPC::V10, PPC::V11, - PPC::V12, PPC::V13, PPC::V14, PPC::V15, - PPC::V16, PPC::V17, PPC::V18, PPC::V19, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31 -}; - -static const unsigned VSRegs[] = { - PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3, - PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7, - PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11, - PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15, - PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19, - PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23, - PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, - PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, - - PPC::V0, PPC::V1, PPC::V2, PPC::V3, - PPC::V4, PPC::V5, PPC::V6, PPC::V7, - PPC::V8, PPC::V9, PPC::V10, PPC::V11, - PPC::V12, PPC::V13, PPC::V14, PPC::V15, - PPC::V16, PPC::V17, PPC::V18, PPC::V19, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31 -}; - -static const unsigned VSFRegs[] = { - PPC::F0, PPC::F1, PPC::F2, PPC::F3, - PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, - PPC::F12, PPC::F13, PPC::F14, PPC::F15, - PPC::F16, PPC::F17, PPC::F18, PPC::F19, - PPC::F20, PPC::F21, PPC::F22, PPC::F23, - PPC::F24, PPC::F25, PPC::F26, PPC::F27, - PPC::F28, PPC::F29, PPC::F30, PPC::F31, - - PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, - PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, - PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, - PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, - PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, - PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, - PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, - PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 -}; - -static const unsigned VSSRegs[] = { - PPC::F0, PPC::F1, PPC::F2, PPC::F3, - PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, - PPC::F12, PPC::F13, PPC::F14, PPC::F15, - PPC::F16, PPC::F17, PPC::F18, PPC::F19, - PPC::F20, PPC::F21, PPC::F22, PPC::F23, - PPC::F24, PPC::F25, PPC::F26, PPC::F27, - PPC::F28, PPC::F29, PPC::F30, PPC::F31, - - PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, - PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, - PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, - PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, - PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, - PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, - PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, - PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 -}; - -static const unsigned GPRegs[] = { - PPC::R0, PPC::R1, PPC::R2, PPC::R3, - PPC::R4, PPC::R5, PPC::R6, PPC::R7, - PPC::R8, PPC::R9, PPC::R10, PPC::R11, - PPC::R12, PPC::R13, PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31 -}; - -static const unsigned GP0Regs[] = { - PPC::ZERO, PPC::R1, PPC::R2, PPC::R3, - PPC::R4, PPC::R5, PPC::R6, PPC::R7, - PPC::R8, PPC::R9, PPC::R10, PPC::R11, - PPC::R12, PPC::R13, PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31 -}; - -static const unsigned G8Regs[] = { - PPC::X0, PPC::X1, PPC::X2, PPC::X3, - PPC::X4, PPC::X5, PPC::X6, PPC::X7, - PPC::X8, PPC::X9, PPC::X10, PPC::X11, - PPC::X12, PPC::X13, PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31 -}; - -static const unsigned G80Regs[] = { - PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3, - PPC::X4, PPC::X5, PPC::X6, PPC::X7, - PPC::X8, PPC::X9, PPC::X10, PPC::X11, - PPC::X12, PPC::X13, PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31 -}; - -static const unsigned QFRegs[] = { - PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3, - PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, - PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, - PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15, - PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19, - PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23, - PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27, - PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31 -}; - -static const unsigned SPERegs[] = { - PPC::S0, PPC::S1, PPC::S2, PPC::S3, - PPC::S4, PPC::S5, PPC::S6, PPC::S7, - PPC::S8, PPC::S9, PPC::S10, PPC::S11, - PPC::S12, PPC::S13, PPC::S14, PPC::S15, - PPC::S16, PPC::S17, PPC::S18, PPC::S19, - PPC::S20, PPC::S21, PPC::S22, PPC::S23, - PPC::S24, PPC::S25, PPC::S26, PPC::S27, - PPC::S28, PPC::S29, PPC::S30, PPC::S31 -}; - template <std::size_t N> static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, - const unsigned (&Regs)[N]) { + const MCPhysReg (&Regs)[N]) { assert(RegNo < N && "Invalid register number"); Inst.addOperand(MCOperand::createReg(Regs[RegNo])); return MCDisassembler::Success; @@ -308,25 +135,25 @@ static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, GPRegs); + return decodeRegisterClass(Inst, RegNo, RRegs); } static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, GP0Regs); + return decodeRegisterClass(Inst, RegNo, RRegsNoR0); } static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, G8Regs); + return decodeRegisterClass(Inst, RegNo, XRegs); } static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, G80Regs); + return decodeRegisterClass(Inst, RegNo, XRegsNoX0); } #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass @@ -341,7 +168,7 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, GPRegs); + return decodeRegisterClass(Inst, RegNo, RRegs); } static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo, @@ -388,19 +215,19 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, case PPC::LFSU: case PPC::LFDU: // Add the tied output operand. - Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); break; case PPC::STBU: case PPC::STHU: case PPC::STWU: case PPC::STFSU: case PPC::STFDU: - Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base])); + Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base])); break; } Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp))); - Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } @@ -416,12 +243,12 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm, if (Inst.getOpcode() == PPC::LDU) // Add the tied output operand. - Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); else if (Inst.getOpcode() == PPC::STDU) - Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base])); + Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base])); Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2))); - Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } @@ -436,7 +263,7 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm, assert(Base < 32 && "Invalid base register"); Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 4))); - Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } @@ -451,7 +278,7 @@ static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm, assert(Base < 32 && "Invalid base register"); Inst.addOperand(MCOperand::createImm(Disp << 3)); - Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } @@ -466,7 +293,7 @@ static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm, assert(Base < 32 && "Invalid base register"); Inst.addOperand(MCOperand::createImm(Disp << 2)); - Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } @@ -481,7 +308,7 @@ static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm, assert(Base < 32 && "Invalid base register"); Inst.addOperand(MCOperand::createImm(Disp << 1)); - Inst.addOperand(MCOperand::createReg(GP0Regs[Base])); + Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index fd7f81591426..fc29e4effbb1 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -499,43 +499,14 @@ bool PPCInstPrinter::showRegistersWithPrefix() const { return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames; } -/// stripRegisterPrefix - This method strips the character prefix from a -/// register name so that only the number is left. -static const char *stripRegisterPrefix(const char *RegName) { - switch (RegName[0]) { - case 'r': - case 'f': - case 'q': // for QPX - case 'v': - if (RegName[1] == 's') - return RegName + 2; - return RegName + 1; - case 'c': if (RegName[1] == 'r') return RegName + 2; - } - - return RegName; -} - void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { unsigned Reg = Op.getReg(); - - // There are VSX instructions that use VSX register numbering (vs0 - vs63) - // as well as those that use VMX register numbering (v0 - v31 which - // correspond to vs32 - vs63). If we have an instruction that uses VSX - // numbering, we need to convert the VMX registers to VSX registers. - // Namely, we print 32-63 when the instruction operates on one of the - // VMX registers. - // (Please synchronize with PPCAsmPrinter::printOperand) - if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) && - !ShowVSRNumsAsVR) { - if (PPCInstrInfo::isVRRegister(Reg)) - Reg = PPC::VSX32 + (Reg - PPC::V0); - else if (PPCInstrInfo::isVFRegister(Reg)) - Reg = PPC::VSX32 + (Reg - PPC::VF0); - } + if (!ShowVSRNumsAsVR) + Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()), + Reg, OpNo); const char *RegName; RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg)); @@ -544,7 +515,7 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (showRegistersWithPercentPrefix(RegName)) O << "%"; if (!showRegistersWithPrefix()) - RegName = stripRegisterPrefix(RegName); + RegName = PPCRegisterInfo::stripRegisterPrefix(RegName); O << RegName; return; diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 57bda1403c62..8c15ade6f9c4 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -13,18 +13,13 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "PPCInstrInfo.h" +#include "PPCMCCodeEmitter.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" @@ -39,117 +34,6 @@ using namespace llvm; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); -namespace { - -class PPCMCCodeEmitter : public MCCodeEmitter { - const MCInstrInfo &MCII; - const MCContext &CTX; - bool IsLittleEndian; - -public: - PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : MCII(mcii), CTX(ctx), - IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {} - PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete; - void operator=(const PPCMCCodeEmitter &) = delete; - ~PPCMCCodeEmitter() override = default; - - unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - // getBinaryCodeForInstr - TableGen'erated function for getting the - // binary encoding for an instruction. - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - void encodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const override { - verifyInstructionPredicates(MI, - computeAvailableFeatures(STI.getFeatureBits())); - - unsigned Opcode = MI.getOpcode(); - const MCInstrDesc &Desc = MCII.get(Opcode); - - uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - - // Output the constant in big/little endian byte order. - unsigned Size = Desc.getSize(); - support::endianness E = IsLittleEndian ? support::little : support::big; - switch (Size) { - case 0: - break; - case 4: - support::endian::write<uint32_t>(OS, Bits, E); - break; - case 8: - // If we emit a pair of instructions, the first one is - // always in the top 32 bits, even on little-endian. - support::endian::write<uint32_t>(OS, Bits >> 32, E); - support::endian::write<uint32_t>(OS, Bits, E); - break; - default: - llvm_unreachable("Invalid instruction size"); - } - - ++MCNumEmitted; // Keep track of the # of mi's emitted. - } - -private: - uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; - void verifyInstructionPredicates(const MCInst &MI, - uint64_t AvailableFeatures) const; -}; - -} // end anonymous namespace - MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx) { @@ -264,10 +148,16 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12; const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isImm() && !(MO.getImm() % 16) && - "Expecting an immediate that is a multiple of 16"); + if (MO.isImm()) { + assert(!(MO.getImm() % 16) && + "Expecting an immediate that is a multiple of 16"); + return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits; + } - return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits; + // Otherwise add a fixup for the displacement field. + Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_half16ds)); + return RegBits; } unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo, @@ -354,6 +244,20 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } +// Get the index for this operand in this instruction. This is needed for +// computing the register number in PPCInstrInfo::getRegNumForOperand() for +// any instructions that use a different numbering scheme for registers in +// different operands. +static unsigned getOpIdxForMO(const MCInst &MI, const MCOperand &MO) { + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + const MCOperand &Op = MI.getOperand(i); + if (&Op == &MO) + return i; + } + llvm_unreachable("This operand is not part of this instruction"); + return ~0U; // Silence any warnings about no return. +} + unsigned PPCMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups, @@ -364,14 +268,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 && MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - unsigned Reg = MO.getReg(); - unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg); - - if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg)) - if (PPCInstrInfo::isVRRegister(Reg)) - Encode += 32; - - return Encode; + unsigned OpNo = getOpIdxForMO(MI, MO); + unsigned Reg = + PPCInstrInfo::getRegNumForOperand(MCII.get(MI.getOpcode()), + MO.getReg(), OpNo); + return CTX.getRegisterInfo()->getEncodingValue(Reg); } assert(MO.isImm() && @@ -379,5 +280,42 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, return MO.getImm(); } +void PPCMCCodeEmitter::encodeInstruction( + const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + verifyInstructionPredicates(MI, + computeAvailableFeatures(STI.getFeatureBits())); + + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); + + // Output the constant in big/little endian byte order. + unsigned Size = getInstSizeInBytes(MI); + support::endianness E = IsLittleEndian ? support::little : support::big; + switch (Size) { + case 0: + break; + case 4: + support::endian::write<uint32_t>(OS, Bits, E); + break; + case 8: + // If we emit a pair of instructions, the first one is + // always in the top 32 bits, even on little-endian. + support::endian::write<uint32_t>(OS, Bits >> 32, E); + support::endian::write<uint32_t>(OS, Bits, E); + break; + default: + llvm_unreachable("Invalid instruction size"); + } + + ++MCNumEmitted; // Keep track of the # of mi's emitted. +} + +// Get the number of bytes used to encode the given MCInst. +unsigned PPCMCCodeEmitter::getInstSizeInBytes(const MCInst &MI) const { + unsigned Opcode = MI.getOpcode(); + const MCInstrDesc &Desc = MCII.get(Opcode); + return Desc.getSize(); +} + #define ENABLE_INSTR_PREDICATE_VERIFIER #include "PPCGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h new file mode 100644 index 000000000000..a4bcff4b9450 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h @@ -0,0 +1,109 @@ +//===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H +#define LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" + +namespace llvm { + +class PPCMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + const MCContext &CTX; + bool IsLittleEndian; + +public: + PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) + : MCII(mcii), CTX(ctx), + IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {} + PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete; + void operator=(const PPCMCCodeEmitter &) = delete; + ~PPCMCCodeEmitter() override = default; + + unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; + + // Get the number of bytes used to encode the given MCInst. + unsigned getInstSizeInBytes(const MCInst &MI) const; + +private: + uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; + void verifyInstructionPredicates(const MCInst &MI, + uint64_t AvailableFeatures) const; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 316fd2ccf358..d6e450cba0d7 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -17,6 +17,7 @@ // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/MathExtras.h" #include <cstdint> #include <memory> @@ -104,4 +105,63 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { #define GET_SUBTARGETINFO_ENUM #include "PPCGenSubtargetInfo.inc" +#define PPC_REGS0_31(X) \ + { \ + X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \ + X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \ + X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \ + } + +#define PPC_REGS_NO0_31(Z, X) \ + { \ + Z, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \ + X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \ + X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \ + } + +#define PPC_REGS_LO_HI(LO, HI) \ + { \ + LO##0, LO##1, LO##2, LO##3, LO##4, LO##5, LO##6, LO##7, LO##8, LO##9, \ + LO##10, LO##11, LO##12, LO##13, LO##14, LO##15, LO##16, LO##17, \ + LO##18, LO##19, LO##20, LO##21, LO##22, LO##23, LO##24, LO##25, \ + LO##26, LO##27, LO##28, LO##29, LO##30, LO##31, HI##0, HI##1, HI##2, \ + HI##3, HI##4, HI##5, HI##6, HI##7, HI##8, HI##9, HI##10, HI##11, \ + HI##12, HI##13, HI##14, HI##15, HI##16, HI##17, HI##18, HI##19, \ + HI##20, HI##21, HI##22, HI##23, HI##24, HI##25, HI##26, HI##27, \ + HI##28, HI##29, HI##30, HI##31 \ + } + +using llvm::MCPhysReg; + +#define DEFINE_PPC_REGCLASSES \ + static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \ + static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \ + static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \ + static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \ + static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \ + static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \ + static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \ + static const MCPhysReg RRegsNoR0[32] = \ + PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \ + static const MCPhysReg XRegsNoX0[32] = \ + PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \ + static const MCPhysReg VSRegs[64] = \ + PPC_REGS_LO_HI(PPC::VSL, PPC::V); \ + static const MCPhysReg VSFRegs[64] = \ + PPC_REGS_LO_HI(PPC::F, PPC::VF); \ + static const MCPhysReg VSSRegs[64] = \ + PPC_REGS_LO_HI(PPC::F, PPC::VF); \ + static const MCPhysReg CRBITRegs[32] = { \ + PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, \ + PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, \ + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \ + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, \ + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, \ + PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \ + PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \ + PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \ + static const MCPhysReg CRRegs[8] = { \ + PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \ + PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7} + #endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H diff --git a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td index c6cbb9037ede..17c37964c562 100644 --- a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -111,11 +111,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C], (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"), (instregex "POPCNT(D|W)$"), (instregex "CMPB(8)?$"), + (instregex "SETB(8)?$"), XSTDIVDP, XSTSQRTDP, XSXSIGDP, XSCVSPDPN, - SETB, BPERMD )>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index 80ad4962a20f..98e6e98e6974 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -305,11 +305,11 @@ def : Processor<"generic", G3Itineraries, [Directive32, FeatureHardFloat, FeatureMFTB]>; def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, - FeatureICBT, FeatureBookE, + FeatureICBT, FeatureBookE, FeatureMSYNC, FeatureMFTB]>; def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, - FeatureICBT, FeatureBookE, + FeatureICBT, FeatureBookE, FeatureMSYNC, FeatureMFTB]>; def : Processor<"601", G3Itineraries, [Directive601, FeatureFPU]>; def : Processor<"602", G3Itineraries, [Directive602, FeatureFPU, @@ -348,7 +348,7 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec, - FeatureFRES, FeatureFRSQRTE, + FeatureFRES, FeatureFRSQRTE, FeatureMFTB]>; def : ProcessorModel<"970", G5Model, @@ -369,11 +369,11 @@ def : ProcessorModel<"e500", PPCE500Model, FeatureISEL, FeatureMFTB]>; def : ProcessorModel<"e500mc", PPCE500mcModel, [DirectiveE500mc, - FeatureSTFIWX, FeatureICBT, FeatureBookE, + FeatureSTFIWX, FeatureICBT, FeatureBookE, FeatureISEL, FeatureMFTB]>; def : ProcessorModel<"e5500", PPCE5500Model, [DirectiveE5500, FeatureMFOCRF, Feature64Bit, - FeatureSTFIWX, FeatureICBT, FeatureBookE, + FeatureSTFIWX, FeatureICBT, FeatureBookE, FeatureISEL, FeatureMFTB]>; def : ProcessorModel<"a2", PPCA2Model, [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF, @@ -428,7 +428,7 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; -def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>; +def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat, FeatureMFTB]>; def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat, @@ -478,3 +478,9 @@ def PPC : Target { let AssemblyParserVariants = [PPCAsmParserVariant]; let AllowRegisterRenaming = 1; } + +//===----------------------------------------------------------------------===// +// Pfm Counters +//===----------------------------------------------------------------------===// + +include "PPCPfmCounters.td" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index a9da64cc216f..04aa3c9b1e22 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -158,23 +158,6 @@ public: } // end anonymous namespace -/// stripRegisterPrefix - This method strips the character prefix from a -/// register name so that only the number is left. Used by for linux asm. -static const char *stripRegisterPrefix(const char *RegName) { - switch (RegName[0]) { - case 'r': - case 'f': - case 'q': // for QPX - case 'v': - if (RegName[1] == 's') - return RegName + 2; - return RegName + 1; - case 'c': if (RegName[1] == 'r') return RegName + 2; - } - - return RegName; -} - void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const DataLayout &DL = getDataLayout(); @@ -182,27 +165,15 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, switch (MO.getType()) { case MachineOperand::MO_Register: { - unsigned Reg = MO.getReg(); - - // There are VSX instructions that use VSX register numbering (vs0 - vs63) - // as well as those that use VMX register numbering (v0 - v31 which - // correspond to vs32 - vs63). If we have an instruction that uses VSX - // numbering, we need to convert the VMX registers to VSX registers. - // Namely, we print 32-63 when the instruction operates on one of the - // VMX registers. - // (Please synchronize with PPCInstPrinter::printOperand) - if (MI->getDesc().TSFlags & PPCII::UseVSXReg) { - if (PPCInstrInfo::isVRRegister(Reg)) - Reg = PPC::VSX32 + (Reg - PPC::V0); - else if (PPCInstrInfo::isVFRegister(Reg)) - Reg = PPC::VSX32 + (Reg - PPC::VF0); - } + unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(), + MO.getReg(), OpNo); + const char *RegName = PPCInstPrinter::getRegisterName(Reg); // Linux assembler (Others?) does not take register mnemonics. // FIXME - What about special registers used in mfspr/mtspr? if (!Subtarget->isDarwin()) - RegName = stripRegisterPrefix(RegName); + RegName = PPCRegisterInfo::stripRegisterPrefix(RegName); O << RegName; return; } @@ -279,6 +250,21 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (MI->getOperand(OpNo).isImm()) O << "i"; return false; + case 'x': + if(!MI->getOperand(OpNo).isReg()) + return true; + // This operand uses VSX numbering. + // If the operand is a VMX register, convert it to a VSX register. + unsigned Reg = MI->getOperand(OpNo).getReg(); + if (PPCInstrInfo::isVRRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::V0); + else if (PPCInstrInfo::isVFRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::VF0); + const char *RegName; + RegName = PPCInstPrinter::getRegisterName(Reg); + RegName = PPCRegisterInfo::stripRegisterPrefix(RegName); + O << RegName; + return false; } } @@ -303,7 +289,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, { const char *RegName = "r0"; if (!Subtarget->isDarwin()) - RegName = stripRegisterPrefix(RegName); + RegName = PPCRegisterInfo::stripRegisterPrefix(RegName); O << RegName << ", "; printOperand(MI, OpNo, O); return false; @@ -341,7 +327,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { } void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) { - SM.serializeToStackMapSection(); + emitStackMaps(SM); } void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td index 12c581023234..22842d516e7d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -338,7 +338,7 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; // coldcc calling convection marks most registers as non-volatile. // Do not include r1 since the stack pointer is never considered a CSR. // Do not include r2, since it is the TOC register and is added depending -// on wether or not the function uses the TOC and is a non-leaf. +// on whether or not the function uses the TOC and is a non-leaf. // Do not include r0,r11,r13 as they are optional in functional linkage // and value may be altered by inter-library calls. // Do not include r12 as it is used as a scratch register. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp index fe41e1b36a5d..a03e691ef5bb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -392,7 +392,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL, // liveness state at the end of MBB (liveOut of MBB) as the liveIn for // NewSuccessor. Otherwise, will cause cyclic dependence. LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo()); - SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers; + SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 2> Clobbers; for (MachineInstr &MI : *MBB) LPR.stepForward(MI, Clobbers); for (auto &LI : LPR) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp index f212894035db..3b2d92db78b9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -861,8 +861,20 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, } } + unsigned SrcReg1 = getRegForValue(SrcValue1); + if (SrcReg1 == 0) + return false; + + unsigned SrcReg2 = 0; + if (!UseImm) { + SrcReg2 = getRegForValue(SrcValue2); + if (SrcReg2 == 0) + return false; + } + unsigned CmpOpc; bool NeedsExt = false; + auto RC = MRI.getRegClass(SrcReg1); switch (SrcVT.SimpleTy) { default: return false; case MVT::f32: @@ -879,8 +891,15 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, CmpOpc = PPC::EFSCMPGT; break; } - } else + } else { CmpOpc = PPC::FCMPUS; + if (isVSSRCRegClass(RC)) { + unsigned TmpReg = createResultReg(&PPC::F4RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1); + SrcReg1 = TmpReg; + } + } break; case MVT::f64: if (HasSPE) { @@ -896,14 +915,17 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, CmpOpc = PPC::EFDCMPGT; break; } - } else + } else if (isVSFRCRegClass(RC)) { + CmpOpc = PPC::XSCMPUDP; + } else { CmpOpc = PPC::FCMPUD; + } break; case MVT::i1: case MVT::i8: case MVT::i16: NeedsExt = true; - // Intentional fall-through. + LLVM_FALLTHROUGH; case MVT::i32: if (!UseImm) CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW; @@ -918,17 +940,6 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, break; } - unsigned SrcReg1 = getRegForValue(SrcValue1); - if (SrcReg1 == 0) - return false; - - unsigned SrcReg2 = 0; - if (!UseImm) { - SrcReg2 = getRegForValue(SrcValue2); - if (SrcReg2 == 0) - return false; - } - if (NeedsExt) { unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt)) @@ -2354,7 +2365,8 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) return false; - MI->eraseFromParent(); + MachineBasicBlock::iterator I(MI); + removeDeadCode(I, std::next(I)); return true; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 84dacf396462..8263954994d2 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -17,6 +17,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -28,6 +29,16 @@ using namespace llvm; +#define DEBUG_TYPE "framelowering" +STATISTIC(NumNoNeedForFrame, "Number of functions without frames"); +STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); +STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); + +static cl::opt<bool> +EnablePEVectorSpills("ppc-enable-pe-vector-spills", + cl::desc("Enable spills in prologue to vector registers."), + cl::init(false), cl::Hidden); + /// VRRegNo - Map from a numbered VR register to its enum value. /// static const MCPhysReg VRRegNo[] = { @@ -466,6 +477,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Check whether we can skip adjusting the stack pointer (by using red zone) if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { + NumNoNeedForFrame++; // No need for frame if (UpdateMF) MFI.setStackSize(0); @@ -1213,11 +1225,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, continue; } - int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (CSI[I].isSpilledToReg()) { + unsigned SpilledReg = CSI[I].getDstReg(); + unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( + nullptr, MRI->getDwarfRegNum(Reg, true), + MRI->getDwarfRegNum(SpilledReg, true))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIRegister); + } else { + int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } } } } @@ -1822,17 +1843,19 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { - int FI = GPRegs[i].getFrameIdx(); - - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + if (!GPRegs[i].isSpilledToReg()) { + int FI = GPRegs[i].getFrameIdx(); + MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + } } // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { - int FI = G8Regs[i].getFrameIdx(); - - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + if (!G8Regs[i].isSpilledToReg()) { + int FI = G8Regs[i].getFrameIdx(); + MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + } } unsigned MinReg = @@ -1947,6 +1970,64 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, } } +// This function checks if a callee saved gpr can be spilled to a volatile +// vector register. This occurs for leaf functions when the option +// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers +// which were not spilled to vectors, return false so the target independent +// code can handle them by assigning a FrameIdx to a stack slot. +bool PPCFrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! + + // Early exit if cannot spill gprs to volatile vector registers. + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) + return false; + + // Build a BitVector of VSRs that can be used for spilling GPRs. + BitVector BVAllocatable = TRI->getAllocatableSet(MF); + BitVector BVCalleeSaved(TRI->getNumRegs()); + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSRegs[i]; ++i) + BVCalleeSaved.set(CSRegs[i]); + + for (unsigned Reg : BVAllocatable.set_bits()) { + // Set to 0 if the register is not a volatile VF/F8 register, or if it is + // used in the function. + if (BVCalleeSaved[Reg] || + (!PPC::F8RCRegClass.contains(Reg) && + !PPC::VFRCRegClass.contains(Reg)) || + (MF.getRegInfo().isPhysRegUsed(Reg))) + BVAllocatable.reset(Reg); + } + + bool AllSpilledToReg = true; + for (auto &CS : CSI) { + if (BVAllocatable.none()) + return false; + + unsigned Reg = CS.getReg(); + if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { + AllSpilledToReg = false; + continue; + } + + unsigned VolatileVFReg = BVAllocatable.find_first(); + if (VolatileVFReg < BVAllocatable.size()) { + CS.setDstReg(VolatileVFReg); + BVAllocatable.reset(VolatileVFReg); + } else { + AllSpilledToReg = false; + } + } + return AllSpilledToReg; +} + + bool PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -2012,12 +2093,18 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, CSI[i].getFrameIdx())); } } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - // Use !IsLiveIn for the kill flag. - // We do not want to kill registers that are live in this function - // before their use because they will become undefined registers. - TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, - CSI[i].getFrameIdx(), RC, TRI); + if (CSI[i].isSpilledToReg()) { + NumPESpillVSR++; + BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) + .addReg(Reg, getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + // Use !IsLiveIn for the kill flag. + // We do not want to kill registers that are live in this function + // before their use because they will become undefined registers. + TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, + CSI[i].getFrameIdx(), RC, TRI); + } } } return true; @@ -2157,13 +2244,19 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, CR2Spilled = CR3Spilled = CR4Spilled = false; } - // Default behavior for non-CR saves. - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB.begin() && - "loadRegFromStackSlot didn't insert any code!"); + if (CSI[i].isSpilledToReg()) { + DebugLoc DL; + NumPEReloadVSR++; + BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) + .addReg(CSI[i].getDstReg(), getKillRegState(true)); + } else { + // Default behavior for non-CR saves. + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); } + } // Insert in reverse order. if (AtStart) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index 01c155594c44..69bd1484d6e5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -99,6 +99,13 @@ public: MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const override; + /// This function will assign callee saved gprs to volatile vector registers + /// for prologue spills when applicable. It returns false if there are any + /// registers which were not spilled to volatile vector registers. + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 793a4dd7f624..5f6966cecd61 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -103,7 +103,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, case PPC::Sched::IIC_LdStLHA: case PPC::Sched::IIC_LdStLHAU: case PPC::Sched::IIC_LdStLWA: - case PPC::Sched::IIC_LdStSTDU: + case PPC::Sched::IIC_LdStSTU: case PPC::Sched::IIC_LdStSTFDU: NSlots = 2; break; @@ -112,7 +112,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, case PPC::Sched::IIC_LdStLHAUX: case PPC::Sched::IIC_LdStLWARX: case PPC::Sched::IIC_LdStLDARX: - case PPC::Sched::IIC_LdStSTDUX: + case PPC::Sched::IIC_LdStSTUX: case PPC::Sched::IIC_LdStSTDCX: case PPC::Sched::IIC_LdStSTWCX: case PPC::Sched::IIC_BrMCRX: // mtcr @@ -180,9 +180,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { CurGroup.clear(); CurSlots = CurBranches = 0; } else { - LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << SU->NodeNum - << "): "); - LLVM_DEBUG(DAG->dumpNode(SU)); + LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: "); + LLVM_DEBUG(DAG->dumpNode(*SU)); unsigned NSlots; bool MustBeFirst = mustComeFirst(MCID, NSlots); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6cec664d1e66..31acd0ff870f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison, "Number of logical ops on i1 values calculated in GPR."); STATISTIC(OmittedForNonExtendUses, "Number of compares not eliminated as they have non-extending uses."); +STATISTIC(NumP9Setb, + "Number of compares lowered to setb."); // FIXME: Remove this once the bug has been fixed! cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", @@ -327,7 +329,6 @@ private: bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); - MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr); }; } // end anonymous namespace @@ -490,7 +491,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, if (!FuncInfo->BPI) return PPC::BR_NO_HINT; const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); - const TerminatorInst *BBTerm = BB->getTerminator(); + const Instruction *BBTerm = BB->getTerminator(); if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; @@ -687,9 +688,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { SDValue Op1 = N->getOperand(1); SDLoc dl(N); - KnownBits LKnown, RKnown; - CurDAG->computeKnownBits(Op0, LKnown); - CurDAG->computeKnownBits(Op1, RKnown); + KnownBits LKnown = CurDAG->computeKnownBits(Op0); + KnownBits RKnown = CurDAG->computeKnownBits(Op1); unsigned TargetMask = LKnown.Zero.getZExtValue(); unsigned InsertMask = RKnown.Zero.getZExtValue(); @@ -733,8 +733,7 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { // The AND mask might not be a constant, and we need to make sure that // if we're going to fold the masking with the insert, all bits not // know to be zero in the mask are known to be one. - KnownBits MKnown; - CurDAG->computeKnownBits(Op1.getOperand(1), MKnown); + KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); unsigned SHOpc = Op1.getOperand(0).getOpcode(); @@ -1083,9 +1082,14 @@ class BitPermutationSelector { // lowest-order bit. unsigned Idx; + // ConstZero means a bit we need to mask off. + // Variable is a bit comes from an input variable. + // VariableKnownToBeZero is also a bit comes from an input variable, + // but it is known to be already zero. So we do not need to mask them. enum Kind { ConstZero, - Variable + Variable, + VariableKnownToBeZero } K; ValueBit(SDValue V, unsigned I, Kind K = Variable) @@ -1094,11 +1098,11 @@ class BitPermutationSelector { : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} bool isZero() const { - return K == ConstZero; + return K == ConstZero || K == VariableKnownToBeZero; } bool hasValue() const { - return K == Variable; + return K == Variable || K == VariableKnownToBeZero; } SDValue getValue() const { @@ -1248,8 +1252,14 @@ class BitPermutationSelector { for (unsigned i = 0; i < NumBits; ++i) if (((Mask >> i) & 1) == 1) Bits[i] = (*LHSBits)[i]; - else - Bits[i] = ValueBit(ValueBit::ConstZero); + else { + // AND instruction masks this bit. If the input is already zero, + // we have nothing to do here. Otherwise, make the bit ConstZero. + if ((*LHSBits)[i].isZero()) + Bits[i] = (*LHSBits)[i]; + else + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting, &Bits); } @@ -1259,8 +1269,26 @@ class BitPermutationSelector { const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; bool AllDisjoint = true; - for (unsigned i = 0; i < NumBits; ++i) - if (LHSBits[i].isZero()) + SDValue LastVal = SDValue(); + unsigned LastIdx = 0; + for (unsigned i = 0; i < NumBits; ++i) { + if (LHSBits[i].isZero() && RHSBits[i].isZero()) { + // If both inputs are known to be zero and one is ConstZero and + // another is VariableKnownToBeZero, we can select whichever + // we like. To minimize the number of bit groups, we select + // VariableKnownToBeZero if this bit is the next bit of the same + // input variable from the previous bit. Otherwise, we select + // ConstZero. + if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && + LHSBits[i].getValueBitIndex() == LastIdx + 1) + Bits[i] = LHSBits[i]; + else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && + RHSBits[i].getValueBitIndex() == LastIdx + 1) + Bits[i] = RHSBits[i]; + else + Bits[i] = ValueBit(ValueBit::ConstZero); + } + else if (LHSBits[i].isZero()) Bits[i] = RHSBits[i]; else if (RHSBits[i].isZero()) Bits[i] = LHSBits[i]; @@ -1268,6 +1296,16 @@ class BitPermutationSelector { AllDisjoint = false; break; } + // We remember the value and bit index of this bit. + if (Bits[i].hasValue()) { + LastVal = Bits[i].getValue(); + LastIdx = Bits[i].getValueBitIndex(); + } + else { + if (LastVal) LastVal = SDValue(); + LastIdx = 0; + } + } if (!AllDisjoint) break; @@ -1293,6 +1331,72 @@ class BitPermutationSelector { return std::make_pair(Interesting, &Bits); } + case ISD::TRUNCATE: { + EVT FromType = V.getOperand(0).getValueType(); + EVT ToType = V.getValueType(); + // We support only the case with truncate from i64 to i32. + if (FromType != MVT::i64 || ToType != MVT::i32) + break; + const unsigned NumAllBits = FromType.getSizeInBits(); + SmallVector<ValueBit, 64> *InBits; + std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), + NumAllBits); + const unsigned NumValidBits = ToType.getSizeInBits(); + + // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. + // So, we cannot include this truncate. + bool UseUpper32bit = false; + for (unsigned i = 0; i < NumValidBits; ++i) + if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { + UseUpper32bit = true; + break; + } + if (UseUpper32bit) + break; + + for (unsigned i = 0; i < NumValidBits; ++i) + Bits[i] = (*InBits)[i]; + + return std::make_pair(Interesting, &Bits); + } + case ISD::AssertZext: { + // For AssertZext, we look through the operand and + // mark the bits known to be zero. + const SmallVector<ValueBit, 64> *LHSBits; + std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), + NumBits); + + EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); + const unsigned NumValidBits = FromType.getSizeInBits(); + for (unsigned i = 0; i < NumValidBits; ++i) + Bits[i] = (*LHSBits)[i]; + + // These bits are known to be zero. + for (unsigned i = NumValidBits; i < NumBits; ++i) + Bits[i] = ValueBit((*LHSBits)[i].getValue(), + (*LHSBits)[i].getValueBitIndex(), + ValueBit::VariableKnownToBeZero); + + return std::make_pair(Interesting, &Bits); + } + case ISD::LOAD: + LoadSDNode *LD = cast<LoadSDNode>(V); + if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { + EVT VT = LD->getMemoryVT(); + const unsigned NumValidBits = VT.getSizeInBits(); + + for (unsigned i = 0; i < NumValidBits; ++i) + Bits[i] = ValueBit(V, i); + + // These bits are known to be zero. + for (unsigned i = NumValidBits; i < NumBits; ++i) + Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); + + // Zero-extending load itself cannot be optimized. So, it is not + // interesting by itself though it gives useful information. + return std::make_pair(Interesting = false, &Bits); + } + break; } for (unsigned i = 0; i < NumBits; ++i) @@ -1304,7 +1408,7 @@ class BitPermutationSelector { // For each value (except the constant ones), compute the left-rotate amount // to get it from its original to final position. void computeRotationAmounts() { - HasZeros = false; + NeedMask = false; RLAmt.resize(Bits.size()); for (unsigned i = 0; i < Bits.size(); ++i) if (Bits[i].hasValue()) { @@ -1314,7 +1418,7 @@ class BitPermutationSelector { else RLAmt[i] = Bits.size() - (VBI - i); } else if (Bits[i].isZero()) { - HasZeros = true; + NeedMask = true; RLAmt[i] = UINT32_MAX; } else { llvm_unreachable("Unknown value bit type"); @@ -1330,6 +1434,7 @@ class BitPermutationSelector { unsigned LastRLAmt = RLAmt[0]; SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); unsigned LastGroupStartIdx = 0; + bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); for (unsigned i = 1; i < Bits.size(); ++i) { unsigned ThisRLAmt = RLAmt[i]; SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); @@ -1342,10 +1447,20 @@ class BitPermutationSelector { LastGroupStartIdx = 0; } + // If this bit is known to be zero and the current group is a bit group + // of zeros, we do not need to terminate the current bit group even the + // Value or RLAmt does not match here. Instead, we terminate this group + // when the first non-zero bit appears later. + if (IsGroupOfZeros && Bits[i].isZero()) + continue; + // If this bit has the same underlying value and the same rotate factor as // the last one, then they're part of the same group. if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) - continue; + // We cannot continue the current group if this bits is not known to + // be zero in a bit group of zeros. + if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) + continue; if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, @@ -1353,6 +1468,7 @@ class BitPermutationSelector { LastRLAmt = ThisRLAmt; LastValue = ThisValue; LastGroupStartIdx = i; + IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); } if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, @@ -1401,7 +1517,7 @@ class BitPermutationSelector { for (auto &I : ValueRots) { ValueRotsVec.push_back(I.second); } - llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end()); + llvm::sort(ValueRotsVec); } // In 64-bit mode, rlwinm and friends have a rotation operator that @@ -1588,6 +1704,17 @@ class BitPermutationSelector { return ExtVal; } + SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { + if (V.getValueSizeInBits() == 32) + return V; + + assert(V.getValueSizeInBits() == 64); + SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, + MVT::i32, V, SubRegIdx), 0); + return SubVal; + } + // Depending on the number of groups for a particular value, it might be // better to rotate, mask explicitly (using andi/andis), and then or the // result. Select this part of the result first. @@ -1646,12 +1773,12 @@ class BitPermutationSelector { SDValue VRot; if (VRI.RLAmt) { SDValue Ops[] = - { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), - getI32Imm(31, dl) }; + { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), + getI32Imm(0, dl), getI32Imm(31, dl) }; VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { - VRot = VRI.V; + VRot = TruncateToInt32(VRI.V, dl); } SDValue ANDIVal, ANDISVal; @@ -1698,17 +1825,17 @@ class BitPermutationSelector { // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. - if ((!HasZeros || LateMask) && !Res) { + if ((!NeedMask || LateMask) && !Res) { ValueRotInfo &VRI = ValueRotsVec[0]; if (VRI.RLAmt) { if (InstCnt) *InstCnt += 1; SDValue Ops[] = - { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), - getI32Imm(31, dl) }; + { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), + getI32Imm(0, dl), getI32Imm(31, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { - Res = VRI.V; + Res = TruncateToInt32(VRI.V, dl); } // Now, remove all groups with this underlying value and rotation factor. @@ -1723,13 +1850,13 @@ class BitPermutationSelector { for (auto &BG : BitGroups) { if (!Res) { SDValue Ops[] = - { BG.V, getI32Imm(BG.RLAmt, dl), + { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { SDValue Ops[] = - { Res, BG.V, getI32Imm(BG.RLAmt, dl), + { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); @@ -2077,7 +2204,7 @@ class BitPermutationSelector { // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. - if ((!HasZeros || LateMask) && !Res) { + if ((!NeedMask || LateMask) && !Res) { // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 // groups will come first, and so the VRI representing the largest number // of groups might not be first (it might be the first Repl32 groups). @@ -2230,7 +2357,7 @@ class BitPermutationSelector { SmallVector<ValueBit, 64> Bits; - bool HasZeros; + bool NeedMask; SmallVector<unsigned, 64> RLAmt; SmallVector<BitGroup, 16> BitGroups; @@ -2259,10 +2386,10 @@ public: " selection for: "); LLVM_DEBUG(N->dump(CurDAG)); - // Fill it RLAmt and set HasZeros. + // Fill it RLAmt and set NeedMask. computeRotationAmounts(); - if (!HasZeros) + if (!NeedMask) return Select(N, false); // We currently have two techniques for handling results with zeros: early @@ -4045,54 +4172,148 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); - cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); + CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); } -/// This method returns a node after flipping the MSB of each element -/// of vector integer type. Additionally, if SignBitVec is non-null, -/// this method sets a node with one at MSB of all elements -/// and zero at other bits in SignBitVec. -MachineSDNode * -PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) { - SDLoc dl(N); - EVT VecVT = N.getValueType(); - if (VecVT == MVT::v4i32) { - if (SignBitVec) { - SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32); - *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, - SDValue(ZV, 0)); - } - return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N); - } - else if (VecVT == MVT::v8i16) { - SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32, - getI32Imm(0x8000, dl)); - SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32, - SDValue(Hi, 0), - getI32Imm(0x8000, dl)); - SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT, - SDValue(ScaImm, 0)); - /* - Alternatively, we can do this as follow to use VRF instead of GPR. - vspltish 5, 1 - vspltish 6, 15 - vslh 5, 6, 5 - */ - if (SignBitVec) *SignBitVec = VecImm; - return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N, - SDValue(VecImm, 0)); - } - else if (VecVT == MVT::v16i8) { - SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32, - getI32Imm(0x80, dl)); - if (SignBitVec) *SignBitVec = VecImm; - return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N, - SDValue(VecImm, 0)); +static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, + bool &NeedSwapOps, bool &IsUnCmp) { + + assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue TrueRes = N->getOperand(2); + SDValue FalseRes = N->getOperand(3); + ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); + if (!TrueConst) + return false; + + assert((N->getSimpleValueType(0) == MVT::i64 || + N->getSimpleValueType(0) == MVT::i32) && + "Expecting either i64 or i32 here."); + + // We are looking for any of: + // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) + // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) + // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) + // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) + int64_t TrueResVal = TrueConst->getSExtValue(); + if ((TrueResVal < -1 || TrueResVal > 1) || + (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || + (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || + (TrueResVal == 0 && + (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) + return false; + + bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; + SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); + if (SetOrSelCC.getOpcode() != ISD::SETCC && + SetOrSelCC.getOpcode() != ISD::SELECT_CC) + return false; + + // Without this setb optimization, the outer SELECT_CC will be manually + // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass + // transforms pseduo instruction to isel instruction. When there are more than + // one use for result like zext/sext, with current optimization we only see + // isel is replaced by setb but can't see any significant gain. Since + // setb has longer latency than original isel, we should avoid this. Another + // point is that setb requires comparison always kept, it can break the + // oppotunity to get the comparison away if we have in future. + if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) + return false; + + SDValue InnerLHS = SetOrSelCC.getOperand(0); + SDValue InnerRHS = SetOrSelCC.getOperand(1); + ISD::CondCode InnerCC = + cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); + // If the inner comparison is a select_cc, make sure the true/false values are + // 1/-1 and canonicalize it if needed. + if (InnerIsSel) { + ConstantSDNode *SelCCTrueConst = + dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); + ConstantSDNode *SelCCFalseConst = + dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); + if (!SelCCTrueConst || !SelCCFalseConst) + return false; + int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); + int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); + // The values must be -1/1 (requiring a swap) or 1/-1. + if (SelCCTVal == -1 && SelCCFVal == 1) { + std::swap(InnerLHS, InnerRHS); + } else if (SelCCTVal != 1 || SelCCFVal != -1) + return false; } - else - llvm_unreachable("Unsupported vector data type for flipSignBit"); + + // Canonicalize unsigned case + if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { + IsUnCmp = true; + InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; + } + + bool InnerSwapped = false; + if (LHS == InnerRHS && RHS == InnerLHS) + InnerSwapped = true; + else if (LHS != InnerLHS || RHS != InnerRHS) + return false; + + switch (CC) { + // (select_cc lhs, rhs, 0, \ + // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) + case ISD::SETEQ: + if (!InnerIsSel) + return false; + if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) + return false; + NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; + break; + + // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) + // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) + // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) + // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) + // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) + // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) + case ISD::SETULT: + if (!IsUnCmp && InnerCC != ISD::SETNE) + return false; + IsUnCmp = true; + LLVM_FALLTHROUGH; + case ISD::SETLT: + if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || + (InnerCC == ISD::SETLT && InnerSwapped)) + NeedSwapOps = (TrueResVal == 1); + else + return false; + break; + + // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) + // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) + // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) + // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) + // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) + // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) + case ISD::SETUGT: + if (!IsUnCmp && InnerCC != ISD::SETNE) + return false; + IsUnCmp = true; + LLVM_FALLTHROUGH; + case ISD::SETGT: + if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || + (InnerCC == ISD::SETGT && InnerSwapped)) + NeedSwapOps = (TrueResVal == -1); + else + return false; + break; + + default: + return false; + } + + LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); + LLVM_DEBUG(N->dump()); + + return true; } // Select - Convert the specified operand from a target-independent to a @@ -4429,8 +4650,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { - KnownBits LHSKnown; - CurDAG->computeKnownBits(N->getOperand(0), LHSKnown); + KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. @@ -4557,6 +4777,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) { N->getOperand(0).getValueType() == MVT::i1) break; + if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { + bool NeedSwapOps = false; + bool IsUnCmp = false; + if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (NeedSwapOps) + std::swap(LHS, RHS); + + // Make use of SelectCC to generate the comparison to set CR bits, for + // equality comparisons having one literal operand, SelectCC probably + // doesn't need to materialize the whole literal and just use xoris to + // check it first, it leads the following comparison result can't + // exactly represent GT/LT relationship. So to avoid this we specify + // SETGT/SETUGT here instead of SETEQ. + SDValue GenCC = + SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); + CurDAG->SelectNodeTo( + N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, + N->getValueType(0), GenCC); + NumP9Setb++; + return; + } + } + // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (!isPPC64) if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) @@ -4648,14 +4893,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); return; } - case ISD::VSELECT: - if (PPCSubTarget->hasVSX()) { - SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; - CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); - return; - } - break; - case ISD::VECTOR_SHUFFLE: if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64)) { @@ -4683,11 +4920,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); + MachineMemOperand *MemOp = LD->getMemOperand(); SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops); - cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1); + CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp}); return; } } @@ -4753,6 +4989,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; } + // A signed comparison of i1 values produces the opposite result to an + // unsigned one if the condition code includes less-than or greater-than. + // This is because 1 is the most negative signed i1 number and the most + // positive unsigned i1 number. The CR-logical operations used for such + // comparisons are non-commutative so for signed comparisons vs. unsigned + // ones, the input operands just need to be swapped. + if (ISD::isSignedIntSetCC(CC)) + Swap = !Swap; + SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, N->getOperand(Swap ? 3 : 2), N->getOperand(Swap ? 2 : 3)), 0); @@ -4809,9 +5054,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue TOCbase = N->getOperand(1); SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, TOCbase, GA); - - if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) || - CModel == CodeModel::Large) { + if (PPCLowering->isAccessedAsGotIndirect(GA)) { + // If it is access as got-indirect, we need an extra LD to load + // the address. SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); transferMemOperands(N, MN); @@ -4819,18 +5064,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { - const GlobalValue *GV = G->getGlobal(); - unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); - if (GVFlags & PPCII::MO_NLP_FLAG) { - SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, - SDValue(Tmp, 0)); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; - } - } - + // Build the address relative to the TOC-pointer.. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA)); return; @@ -4916,55 +5150,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } } - case ISD::ABS: { - assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector"); - - // For vector absolute difference, we use VABSDUW instruction of POWER9. - // Since VABSDU instructions are for unsigned integers, we need adjustment - // for signed integers. - // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000). - // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1. - // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000). - EVT VecVT = N->getOperand(0).getValueType(); - SDNode *AbsOp = nullptr; - unsigned AbsOpcode; - - if (VecVT == MVT::v4i32) - AbsOpcode = PPC::VABSDUW; - else if (VecVT == MVT::v8i16) - AbsOpcode = PPC::VABSDUH; - else if (VecVT == MVT::v16i8) - AbsOpcode = PPC::VABSDUB; - else - llvm_unreachable("Unsupported vector data type for ISD::ABS"); - - // Even for signed integers, we can skip adjustment if all values are - // known to be positive (as signed integer) due to zero-extended inputs. - if (N->getOperand(0).getOpcode() == ISD::SUB && - N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND && - N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) { - AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, - SDValue(N->getOperand(0)->getOperand(0)), - SDValue(N->getOperand(0)->getOperand(1))); - ReplaceNode(N, AbsOp); - return; - } - if (N->getOperand(0).getOpcode() == ISD::SUB) { - SDValue SubVal = N->getOperand(0); - SDNode *Op0 = flipSignBit(SubVal->getOperand(0)); - SDNode *Op1 = flipSignBit(SubVal->getOperand(1)); - AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, - SDValue(Op0, 0), SDValue(Op1, 0)); - } - else { - SDNode *Op1 = nullptr; - SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1); - AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0), - SDValue(Op1, 0)); - } - ReplaceNode(N, AbsOp); - return; - } } SelectCode(N); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index b5bdf47ce37a..39608cb74bee 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -251,12 +251,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UREM, MVT::i64, Expand); } - if (Subtarget.hasP9Vector()) { - setOperationAction(ISD::ABS, MVT::v4i32, Legal); - setOperationAction(ISD::ABS, MVT::v8i16, Legal); - setOperationAction(ISD::ABS, MVT::v16i8, Legal); - } - // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); @@ -323,12 +317,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // to speed up scalar BSWAP64. // CTPOP or CTTZ were introduced in P8/P9 respectively setOperationAction(ISD::BSWAP, MVT::i32 , Expand); - if (Subtarget.isISA3_0()) { + if (Subtarget.hasP9Vector()) setOperationAction(ISD::BSWAP, MVT::i64 , Custom); + else + setOperationAction(ISD::BSWAP, MVT::i64 , Expand); + if (Subtarget.isISA3_0()) { setOperationAction(ISD::CTTZ , MVT::i32 , Legal); setOperationAction(ISD::CTTZ , MVT::i64 , Legal); } else { - setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); } @@ -554,6 +550,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); + setOperationAction(ISD::ABS, VT, Custom); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { @@ -586,6 +583,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); + setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::SELECT_CC, VT, Promote); AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); @@ -626,7 +624,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); - setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); @@ -659,6 +656,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + // Without hasP8Altivec set, v2i64 SMAX isn't available. + // But ABS custom lowering requires SMAX support. + if (!Subtarget.hasP8Altivec()) + setOperationAction(ISD::ABS, MVT::v2i64, Expand); + addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); @@ -727,12 +729,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); - setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); - setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); - setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); - setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); - setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); - // Share the Altivec comparison restrictions. setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); @@ -792,12 +788,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); - // Vector operation legalization checks the result type of - // SIGN_EXTEND_INREG, overall legalization checks the inner type. - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); + // Custom handling for partial vectors of integers converted to + // floating point. We already have optimal handling for v2i32 through + // the DAG combine, so those aren't necessary. + setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::FNEG, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); @@ -1055,6 +1056,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); @@ -1076,6 +1078,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::TRUNCATE); + if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); @@ -1088,6 +1092,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::FSQRT); } + if (Subtarget.hasP9Altivec()) { + setTargetDAGCombine(ISD::ABS); + setTargetDAGCombine(ISD::VSELECT); + } + // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -1348,6 +1357,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; + case PPCISD::VABSD: return "PPCISD::VABSD"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; @@ -1355,6 +1365,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; + case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; } return nullptr; } @@ -2214,11 +2225,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. - KnownBits LHSKnown, RHSKnown; - DAG.computeKnownBits(N.getOperand(0), LHSKnown); + KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); if (LHSKnown.Zero.getBoolValue()) { - DAG.computeKnownBits(N.getOperand(1), RHSKnown); + KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1)); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) { @@ -2317,8 +2327,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. - KnownBits LHSKnown; - DAG.computeKnownBits(N.getOperand(0), LHSKnown); + KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't @@ -2405,6 +2414,28 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, return true; } +/// Returns true if we should use a direct load into vector instruction +/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence. +static bool usePartialVectorLoads(SDNode *N) { + if (!N->hasOneUse()) + return false; + + // If there are any other uses other than scalar to vector, then we should + // keep it as a scalar load -> direct move pattern to prevent multiple + // loads. Currently, only check for i64 since we have lxsd/lfd to do this + // efficiently, but no update equivalent. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + EVT MemVT = LD->getMemoryVT(); + if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) { + SDNode *User = *(LD->use_begin()); + if (User->getOpcode() == ISD::SCALAR_TO_VECTOR) + return true; + } + } + + return false; +} + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -2430,6 +2461,13 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, } else return false; + // Do not generate pre-inc forms for specific loads that feed scalar_to_vector + // instructions because we can fold these into a more efficient instruction + // instead, (such as LXSD). + if (isLoad && usePartialVectorLoads(N)) { + return false; + } + // PowerPC doesn't have preinc load/store instructions for vectors (except // for QPX, which does have preinc r+r forms). if (VT.isVector()) { @@ -2674,7 +2712,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual BlockAddress is stored in the TOC. - if (Subtarget.isSVR4ABI() && isPositionIndependent()) { + if (Subtarget.isSVR4ABI() && + (Subtarget.isPPC64() || isPositionIndependent())) { if (Subtarget.isPPC64()) setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); @@ -3480,9 +3519,14 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( // Argument stored in memory. assert(VA.isMemLoc()); + // Get the extended size of the argument type in stack unsigned ArgSize = VA.getLocVT().getStoreSize(); - int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(), - isImmutable); + // Get the actual size of the argument type + unsigned ObjSize = VA.getValVT().getStoreSize(); + unsigned ArgOffset = VA.getLocMemOffset(); + // Stack objects in PPC32 are right justified. + ArgOffset += ArgSize - ObjSize; + int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); @@ -3935,7 +3979,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"); - /* fall through */ + LLVM_FALLTHROUGH; case MVT::v4f64: case MVT::v4i1: @@ -5053,9 +5097,15 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live // into the call. - if (isSVR4ABI && isPPC64 && !isPatchPoint) { + // We do need to reserve X2 to appease the verifier for the PATCHPOINT. + if (isSVR4ABI && isPPC64) { setUsesTOCBasePtr(DAG); - Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); + + // We cannot add X2 as an operand here for PATCHPOINT, because there is no + // way to mark dependencies as implicit here. We will add the X2 dependency + // in EmitInstrWithCustomInserter. + if (!isPatchPoint) + Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); } return CallOpc; @@ -5437,10 +5487,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( Arg = PtrOff; } - if (VA.isRegLoc()) { - if (Arg.getValueType() == MVT::i1) - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); + // When useCRBits() is true, there can be i1 arguments. + // It is because getRegisterType(MVT::i1) => MVT::i1, + // and for other integer types getRegisterType() => MVT::i32. + // Extend i1 and ensure callee will get i32. + if (Arg.getValueType() == MVT::i1) + Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, MVT::i32, Arg); + if (VA.isRegLoc()) { seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -6073,7 +6128,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"); - /* fall through */ + LLVM_FALLTHROUGH; case MVT::v4f64: case MVT::v4i1: { bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; @@ -7228,10 +7283,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, return FP; } +static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { + + EVT VecVT = Vec.getValueType(); + assert(VecVT.isVector() && "Expected a vector type."); + assert(VecVT.getSizeInBits() < 128 && "Vector is already full width."); + + EVT EltVT = VecVT.getVectorElementType(); + unsigned WideNumElts = 128 / EltVT.getSizeInBits(); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); + + unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = Vec; + SDValue UndefVec = DAG.getUNDEF(VecVT); + for (unsigned i = 1; i < NumConcat; ++i) + Ops[i] = UndefVec; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops); +} + +SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, + const SDLoc &dl) const { + + unsigned Opc = Op.getOpcode(); + assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) && + "Unexpected conversion type"); + assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && + "Supports conversions to v2f64/v4f32 only."); + + bool SignedConv = Opc == ISD::SINT_TO_FP; + bool FourEltRes = Op.getValueType() == MVT::v4f32; + + SDValue Wide = widenVec(DAG, Op.getOperand(0), dl); + EVT WideVT = Wide.getValueType(); + unsigned WideNumElts = WideVT.getVectorNumElements(); + MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64; + + SmallVector<int, 16> ShuffV; + for (unsigned i = 0; i < WideNumElts; ++i) + ShuffV.push_back(i + WideNumElts); + + int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2; + int SaveElts = FourEltRes ? 4 : 2; + if (Subtarget.isLittleEndian()) + for (int i = 0; i < SaveElts; i++) + ShuffV[i * Stride] = i; + else + for (int i = 1; i <= SaveElts; i++) + ShuffV[i * Stride - 1] = i - 1; + + SDValue ShuffleSrc2 = + SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT); + SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV); + unsigned ExtendOp = + SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST; + + SDValue Extend; + if (!Subtarget.hasP9Altivec() && SignedConv) { + Arrange = DAG.getBitcast(IntermediateVT, Arrange); + Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange, + DAG.getValueType(Op.getOperand(0).getValueType())); + } else + Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange); + + return DAG.getNode(Opc, dl, Op.getValueType(), Extend); +} + SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + EVT InVT = Op.getOperand(0).getValueType(); + EVT OutVT = Op.getValueType(); + if (OutVT.isVector() && OutVT.isFloatingPoint() && + isOperationCustom(Op.getOpcode(), InVT)) + return LowerINT_TO_FPVector(Op, DAG, dl); + // Conversions to f128 are legal. if (EnableQuadPrecision && (Op.getValueType() == MVT::f128)) return Op; @@ -8902,35 +9030,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::R2, MVT::i32); } - // We are looking for absolute values here. - // The idea is to try to fit one of two patterns: - // max (a, (0-a)) OR max ((0-a), a) - if (Subtarget.hasP9Vector() && - (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw || - IntrinsicID == Intrinsic::ppc_altivec_vmaxsh || - IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) { - SDValue V1 = Op.getOperand(1); - SDValue V2 = Op.getOperand(2); - if (V1.getSimpleValueType() == V2.getSimpleValueType() && - (V1.getSimpleValueType() == MVT::v4i32 || - V1.getSimpleValueType() == MVT::v8i16 || - V1.getSimpleValueType() == MVT::v16i8)) { - if ( V1.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && - V1.getOperand(1) == V2 ) { - // Generate the abs instruction with the operands - return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2); - } - - if ( V2.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && - V2.getOperand(1) == V1 ) { - // Generate the abs instruction with the operands - return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1); - } - } - } - // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. int CompareOpc; @@ -9081,30 +9180,6 @@ SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO); } -SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); - // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int - // instructions), but for smaller types, we need to first extend up to v2i32 - // before doing going farther. - if (Op.getValueType() == MVT::v2i64) { - EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - if (ExtVT != MVT::v2i32) { - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); - Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, - DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), - ExtVT.getVectorElementType(), 4))); - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); - Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, - DAG.getValueType(MVT::v2i32)); - } - - return Op; - } - - return SDValue(); -} - SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -9495,6 +9570,44 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { } } +SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { + + assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"); + + EVT VT = Op.getValueType(); + assert(VT.isVector() && + "Only set vector abs as custom, scalar abs shouldn't reach here!"); + assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || + VT == MVT::v16i8) && + "Unexpected vector element type!"); + assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && + "Current subtarget doesn't support smax v2i64!"); + + // For vector abs, it can be lowered to: + // abs x + // ==> + // y = -x + // smax(x, y) + + SDLoc dl(Op); + SDValue X = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X); + + // SMAX patch https://reviews.llvm.org/D47332 + // hasn't landed yet, so use intrinsic first here. + // TODO: Should use SMAX directly once SMAX patch landed + Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw; + if (VT == MVT::v2i64) + BifID = Intrinsic::ppc_altivec_vmaxsd; + else if (VT == MVT::v8i16) + BifID = Intrinsic::ppc_altivec_vmaxsh; + else if (VT == MVT::v16i8) + BifID = Intrinsic::ppc_altivec_vmaxsb; + + return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -9544,10 +9657,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); - case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + case ISD::ABS: return LowerABS(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); @@ -9624,6 +9737,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; + case ISD::BITCAST: + // Don't handle bitcast here. + return; } } @@ -9787,17 +9903,14 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, return BB; } -MachineBasicBlock * -PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, - MachineBasicBlock *BB, - bool is8bit, // operation - unsigned BinOpcode, - unsigned CmpOpcode, - unsigned CmpPred) const { +MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( + MachineInstr &MI, MachineBasicBlock *BB, + bool is8bit, // operation + unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) - return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, - CmpOpcode, CmpPred); + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, + CmpPred); // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -9821,7 +9934,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = - CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; + CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); if (CmpOpcode) @@ -9832,22 +9945,25 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); - const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass - : &PPC::GPRCRegClass; + const TargetRegisterClass *RC = + is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); - unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC); unsigned ShiftReg = - isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); - unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); - unsigned MaskReg = RegInfo.createVirtualRegister(RC); - unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); - unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); - unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); - unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); - unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); - unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); + isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); + unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC); + unsigned MaskReg = RegInfo.createVirtualRegister(GPRC); + unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC); + unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC); + unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC); + unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC); + unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC); + unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC); unsigned Ptr1Reg; - unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); + unsigned TmpReg = + (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC); // thisMBB: // ... @@ -9876,82 +9992,107 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) - .addReg(ptrA).addReg(ptrB); + .addReg(ptrA) + .addReg(ptrB); } else { Ptr1Reg = ptrB; } - BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) - .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); + // We need use 32-bit subregister to avoid mismatch register class in 64-bit + // mode. + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) + .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0) + .addImm(3) + .addImm(27) + .addImm(is8bit ? 28 : 27); if (!isLittleEndian) - BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) - .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) + .addReg(Shift1Reg) + .addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) - .addReg(Ptr1Reg).addImm(0).addImm(61); + .addReg(Ptr1Reg) + .addImm(0) + .addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) - .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); - BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) - .addReg(incr).addReg(ShiftReg); + .addReg(Ptr1Reg) + .addImm(0) + .addImm(0) + .addImm(29); + BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); - BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); + BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) + .addReg(Mask3Reg) + .addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) - .addReg(Mask2Reg).addReg(ShiftReg); + .addReg(Mask2Reg) + .addReg(ShiftReg); BB = loopMBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) - .addReg(ZeroReg).addReg(PtrReg); + .addReg(ZeroReg) + .addReg(PtrReg); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) - .addReg(Incr2Reg).addReg(TmpDestReg); - BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) - .addReg(TmpDestReg).addReg(MaskReg); - BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) - .addReg(TmpReg).addReg(MaskReg); + .addReg(Incr2Reg) + .addReg(TmpDestReg); + BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) + .addReg(TmpDestReg) + .addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg); if (CmpOpcode) { // For unsigned comparisons, we can directly compare the shifted values. // For signed comparisons we shift and sign extend. - unsigned SReg = RegInfo.createVirtualRegister(RC); - BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) - .addReg(TmpDestReg).addReg(MaskReg); + unsigned SReg = RegInfo.createVirtualRegister(GPRC); + BuildMI(BB, dl, TII->get(PPC::AND), SReg) + .addReg(TmpDestReg) + .addReg(MaskReg); unsigned ValueReg = SReg; unsigned CmpReg = Incr2Reg; if (CmpOpcode == PPC::CMPW) { - ValueReg = RegInfo.createVirtualRegister(RC); + ValueReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) - .addReg(SReg).addReg(ShiftReg); - unsigned ValueSReg = RegInfo.createVirtualRegister(RC); + .addReg(SReg) + .addReg(ShiftReg); + unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) - .addReg(ValueReg); + .addReg(ValueReg); ValueReg = ValueSReg; CmpReg = incr; } BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) - .addReg(CmpReg).addReg(ValueReg); + .addReg(CmpReg) + .addReg(ValueReg); BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); + .addImm(CmpPred) + .addReg(PPC::CR0) + .addMBB(exitMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(exitMBB); BB = loop2MBB; } - BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) - .addReg(Tmp3Reg).addReg(Tmp2Reg); + BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) - .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); + .addReg(Tmp4Reg) + .addReg(ZeroReg) + .addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); + .addImm(PPC::PRED_NE) + .addReg(PPC::CR0) + .addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; - BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) - .addReg(ShiftReg); + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) + .addReg(TmpDestReg) + .addReg(ShiftReg); return BB; } @@ -9968,10 +10109,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); - // Memory Reference - MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); - MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); - unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); @@ -10034,10 +10171,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) - .addReg(PPC::X2) - .addImm(TOCOffset) - .addReg(BufReg); - MIB.setMemRefs(MMOBegin, MMOEnd); + .addReg(PPC::X2) + .addImm(TOCOffset) + .addReg(BufReg) + .cloneMemRefs(MI); } // Naked functions never have a base pointer, and so we use r1. For all @@ -10052,8 +10189,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) .addReg(BaseReg) .addImm(BPOffset) - .addReg(BufReg); - MIB.setMemRefs(MMOBegin, MMOEnd); + .addReg(BufReg) + .cloneMemRefs(MI); // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); @@ -10086,8 +10223,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, .addImm(LabelOffset) .addReg(BufReg); } - - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB.cloneMemRefs(MI); BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); mainMBB->addSuccessor(sinkMBB); @@ -10111,10 +10247,6 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - // Memory Reference - MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); - MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); - MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); @@ -10152,7 +10284,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, .addImm(0) .addReg(BufReg); } - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB.cloneMemRefs(MI); // Reload IP if (PVT == MVT::i64) { @@ -10164,7 +10296,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, .addImm(LabelOffset) .addReg(BufReg); } - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB.cloneMemRefs(MI); // Reload SP if (PVT == MVT::i64) { @@ -10176,7 +10308,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, .addImm(SPOffset) .addReg(BufReg); } - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB.cloneMemRefs(MI); // Reload BP if (PVT == MVT::i64) { @@ -10188,16 +10320,15 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, .addImm(BPOffset) .addReg(BufReg); } - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB.cloneMemRefs(MI); // Reload TOC if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) - .addImm(TOCOffset) - .addReg(BufReg); - - MIB.setMemRefs(MMOBegin, MMOEnd); + .addImm(TOCOffset) + .addReg(BufReg) + .cloneMemRefs(MI); } // Jump @@ -10221,7 +10352,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // way to mark the dependence as implicit there, and so the stackmap code // will confuse it with a regular operand. Instead, add the dependence // here. - setUsesTOCBasePtr(*BB->getParent()); MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); } @@ -10246,8 +10376,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineFunction *F = BB->getParent(); if (MI.getOpcode() == PPC::SELECT_CC_I4 || - MI.getOpcode() == PPC::SELECT_CC_I8 || - MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) { + MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || + MI.getOpcode() == PPC::SELECT_I8) { SmallVector<MachineOperand, 2> Cond; if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8) @@ -10392,9 +10522,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) - .addReg(HiReg).addReg(ReadAgainReg); + .addReg(HiReg) + .addReg(ReadAgainReg); BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); + .addImm(PPC::PRED_NE) + .addReg(CmpReg) + .addMBB(readMBB); BB->addSuccessor(readMBB); BB->addSuccessor(sinkMBB); @@ -10564,27 +10697,35 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; - BuildMI(BB, dl, TII->get(LoadMnemonic), dest) - .addReg(ptrA).addReg(ptrB); + BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) - .addReg(oldval).addReg(dest); + .addReg(oldval) + .addReg(dest); BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); + .addImm(PPC::PRED_NE) + .addReg(PPC::CR0) + .addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) - .addReg(newval).addReg(ptrA).addReg(ptrB); + .addReg(newval) + .addReg(ptrA) + .addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); + .addImm(PPC::PRED_NE) + .addReg(PPC::CR0) + .addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) - .addReg(dest).addReg(ptrA).addReg(ptrB); + .addReg(dest) + .addReg(ptrA) + .addReg(ptrB); BB->addSuccessor(exitMBB); // exitMBB: @@ -10619,24 +10760,26 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); - const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass - : &PPC::GPRCRegClass; + const TargetRegisterClass *RC = + is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); - unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC); unsigned ShiftReg = - isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); - unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); - unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); - unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); - unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); - unsigned MaskReg = RegInfo.createVirtualRegister(RC); - unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); - unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); - unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); - unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); - unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); + isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); + unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC); + unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC); + unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC); + unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC); + unsigned MaskReg = RegInfo.createVirtualRegister(GPRC); + unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC); + unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC); + unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC); + unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC); + unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC); unsigned Ptr1Reg; - unsigned TmpReg = RegInfo.createVirtualRegister(RC); + unsigned TmpReg = RegInfo.createVirtualRegister(GPRC); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... @@ -10673,74 +10816,107 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) - .addReg(ptrA).addReg(ptrB); + .addReg(ptrA) + .addReg(ptrB); } else { Ptr1Reg = ptrB; } - BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) - .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); + + // We need use 32-bit subregister to avoid mismatch register class in 64-bit + // mode. + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) + .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0) + .addImm(3) + .addImm(27) + .addImm(is8bit ? 28 : 27); if (!isLittleEndian) - BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) - .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) + .addReg(Shift1Reg) + .addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) - .addReg(Ptr1Reg).addImm(0).addImm(61); + .addReg(Ptr1Reg) + .addImm(0) + .addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) - .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); + .addReg(Ptr1Reg) + .addImm(0) + .addImm(0) + .addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) - .addReg(newval).addReg(ShiftReg); + .addReg(newval) + .addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) - .addReg(oldval).addReg(ShiftReg); + .addReg(oldval) + .addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) - .addReg(Mask3Reg).addImm(65535); + .addReg(Mask3Reg) + .addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) - .addReg(Mask2Reg).addReg(ShiftReg); + .addReg(Mask2Reg) + .addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) - .addReg(NewVal2Reg).addReg(MaskReg); + .addReg(NewVal2Reg) + .addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) - .addReg(OldVal2Reg).addReg(MaskReg); + .addReg(OldVal2Reg) + .addReg(MaskReg); BB = loop1MBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) - .addReg(ZeroReg).addReg(PtrReg); - BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) - .addReg(TmpDestReg).addReg(MaskReg); + .addReg(ZeroReg) + .addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::AND), TmpReg) + .addReg(TmpDestReg) + .addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) - .addReg(TmpReg).addReg(OldVal3Reg); + .addReg(TmpReg) + .addReg(OldVal3Reg); BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); + .addImm(PPC::PRED_NE) + .addReg(PPC::CR0) + .addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; - BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) - .addReg(TmpDestReg).addReg(MaskReg); - BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) - .addReg(Tmp2Reg).addReg(NewVal3Reg); - BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) - .addReg(ZeroReg).addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) + .addReg(TmpDestReg) + .addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg) + .addReg(Tmp2Reg) + .addReg(NewVal3Reg); + BuildMI(BB, dl, TII->get(PPC::STWCX)) + .addReg(Tmp4Reg) + .addReg(ZeroReg) + .addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); + .addImm(PPC::PRED_NE) + .addReg(PPC::CR0) + .addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; - BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) - .addReg(ZeroReg).addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::STWCX)) + .addReg(TmpDestReg) + .addReg(ZeroReg) + .addReg(PtrReg); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; - BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) - .addReg(ShiftReg); + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) + .addReg(TmpReg) + .addReg(ShiftReg); } else if (MI.getOpcode() == PPC::FADDrtz) { // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR @@ -10777,9 +10953,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? - &PPC::GPRCRegClass : - &PPC::G8RCRegClass); + unsigned Dest = RegInfo.createVirtualRegister( + Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); DebugLoc dl = MI.getDebugLoc(); BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) @@ -11231,9 +11406,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. - KnownBits Op1Known, Op2Known; - DAG.computeKnownBits(N->getOperand(0), Op1Known); - DAG.computeKnownBits(N->getOperand(1), Op2Known); + KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0)); + KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); // We don't really care about what is known about the first bit (if // anything), so clear it in all masks prior to comparing them. @@ -11750,6 +11924,37 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, ShiftCst); } +SDValue PPCTargetLowering::combineSetCC(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::SETCC && + "Should be called with a SETCC node"); + + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); + if (CC == ISD::SETNE || CC == ISD::SETEQ) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // If there is a '0 - y' pattern, canonicalize the pattern to the RHS. + if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && + LHS.hasOneUse()) + std::swap(LHS, RHS); + + // x == 0-y --> x+y == 0 + // x != 0-y --> x+y != 0 + if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && + RHS.hasOneUse()) { + SDLoc DL(N); + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + EVT OpVT = LHS.getValueType(); + SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1)); + return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC); + } + } + + return DAGCombineTruncBoolExt(N, DCI); +} + // Is this an extending load from an f32 to an f64? static bool isFPExtLoad(SDValue Op) { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode())) @@ -11869,7 +12074,8 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; } // Not a build vector of (possibly fp_rounded) loads. - if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) + if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) || + N->getNumOperands() == 1) return SDValue(); for (int i = 1, e = N->getNumOperands(); i < e; ++i) { @@ -12450,6 +12656,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDLoc dl(N); switch (N->getOpcode()) { default: break; + case ISD::ADD: + return combineADD(N, DCI); case ISD::SHL: return combineSHL(N, DCI); case ISD::SRA: @@ -12476,7 +12684,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: + return combineTRUNCATE(N, DCI); case ISD::SETCC: + if (SDValue CSCC = combineSetCC(N, DCI)) + return CSCC; + LLVM_FALLTHROUGH; case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); case ISD::SINT_TO_FP: @@ -12499,9 +12711,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, (Op1VT == MVT::i32 || Op1VT == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) { - // STBRX can only handle simple types. + // STBRX can only handle simple types and it makes no sense to store less + // two bytes in byte-reversed order. EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); - if (mVT.isExtended()) + if (mVT.isExtended() || mVT.getSizeInBits() < 16) break; SDValue BSwapOp = N->getOperand(1).getOperand(0); @@ -12877,6 +13090,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + // Combine vmaxsw/h/b(a, a's negation) to abs(a) + // Expose the vabsduw/h/b opportunity for down stream + if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() && + (IID == Intrinsic::ppc_altivec_vmaxsw || + IID == Intrinsic::ppc_altivec_vmaxsh || + IID == Intrinsic::ppc_altivec_vmaxsb)) { + SDValue V1 = N->getOperand(1); + SDValue V2 = N->getOperand(2); + if ((V1.getSimpleValueType() == MVT::v4i32 || + V1.getSimpleValueType() == MVT::v8i16 || + V1.getSimpleValueType() == MVT::v16i8) && + V1.getSimpleValueType() == V2.getSimpleValueType()) { + // (0-a, a) + if (V1.getOpcode() == ISD::SUB && + ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && + V1.getOperand(1) == V2) { + return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2); + } + // (a, 0-a) + if (V2.getOpcode() == ISD::SUB && + ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && + V2.getOperand(1) == V1) { + return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); + } + // (x-y, y-x) + if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB && + V1.getOperand(0) == V2.getOperand(1) && + V1.getOperand(1) == V2.getOperand(0)) { + return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); + } + } + } } break; @@ -13109,6 +13355,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); + case ISD::ABS: + return combineABS(N, DCI); + case ISD::VSELECT: + return combineVSelect(N, DCI); } return SDValue(); @@ -13251,7 +13501,8 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const { } else if (Constraint == "wc") { // individual CR bits. return C_RegisterClass; } else if (Constraint == "wa" || Constraint == "wd" || - Constraint == "wf" || Constraint == "ws") { + Constraint == "wf" || Constraint == "ws" || + Constraint == "wi") { return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); @@ -13281,6 +13532,8 @@ PPCTargetLowering::getSingleConstraintMatchWeight( return CW_Register; else if (StringRef(constraint) == "ws" && type->isDoubleTy()) return CW_Register; + else if (StringRef(constraint) == "wi" && type->isIntegerTy(64)) + return CW_Register; // just hold 64-bit integers data. switch (*constraint) { default: @@ -13363,7 +13616,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // An individual CR bit. return std::make_pair(0U, &PPC::CRBITRCRegClass); } else if ((Constraint == "wa" || Constraint == "wd" || - Constraint == "wf") && Subtarget.hasVSX()) { + Constraint == "wf" || Constraint == "wi") && + Subtarget.hasVSX()) { return std::make_pair(0U, &PPC::VSRCRegClass); } else if (Constraint == "ws" && Subtarget.hasVSX()) { if (VT == MVT::f32 && Subtarget.hasP8Vector()) @@ -13598,6 +13852,35 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, report_fatal_error("Invalid register name global variable"); } +bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { + // 32-bit SVR4 ABI access everything as got-indirect. + if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) + return true; + + CodeModel::Model CModel = getTargetMachine().getCodeModel(); + // If it is small or large code model, module locals are accessed + // indirectly by loading their address from .toc/.got. The difference + // is that for large code model we have ADDISTocHa + LDtocL and for + // small code model we simply have LDtoc. + if (CModel == CodeModel::Small || CModel == CodeModel::Large) + return true; + + // JumpTable and BlockAddress are accessed as got-indirect. + if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA)) + return true; + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { + const GlobalValue *GV = G->getGlobal(); + unsigned char GVFlags = Subtarget.classifyGlobalReference(GV); + // The NLP flag indicates that a global access has to use an + // extra indirection. + if (GVFlags & PPCII::MO_NLP_FLAG) + return true; + } + + return false; +} + bool PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. @@ -14116,7 +14399,30 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; - return SDValue(); + SDValue N0 = N->getOperand(0); + ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!Subtarget.isISA3_0() || + N0.getOpcode() != ISD::SIGN_EXTEND || + N0.getOperand(0).getValueType() != MVT::i32 || + CN1 == nullptr || N->getValueType(0) != MVT::i64) + return SDValue(); + + // We can't save an operation here if the value is already extended, and + // the existing shift is easier to combine. + SDValue ExtsSrc = N0.getOperand(0); + if (ExtsSrc.getOpcode() == ISD::TRUNCATE && + ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext) + return SDValue(); + + SDLoc DL(N0); + SDValue ShiftBy = SDValue(CN1, 0); + // We want the shift amount to be i32 on the extswli, but the shift could + // have an i64. + if (ShiftBy.getValueType() == MVT::i64) + ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32); + + return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0), + ShiftBy); } SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const { @@ -14133,6 +14439,152 @@ SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const { return SDValue(); } +// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1)) +// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0)) +// When C is zero, the equation (addi Z, -C) can be simplified to Z +// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types +static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + if (!Subtarget.isPPC64()) + return SDValue(); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + auto isZextOfCompareWithConstant = [](SDValue Op) { + if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() || + Op.getValueType() != MVT::i64) + return false; + + SDValue Cmp = Op.getOperand(0); + if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() || + Cmp.getOperand(0).getValueType() != MVT::i64) + return false; + + if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) { + int64_t NegConstant = 0 - Constant->getSExtValue(); + // Due to the limitations of the addi instruction, + // -C is required to be [-32768, 32767]. + return isInt<16>(NegConstant); + } + + return false; + }; + + bool LHSHasPattern = isZextOfCompareWithConstant(LHS); + bool RHSHasPattern = isZextOfCompareWithConstant(RHS); + + // If there is a pattern, canonicalize a zext operand to the RHS. + if (LHSHasPattern && !RHSHasPattern) + std::swap(LHS, RHS); + else if (!LHSHasPattern && !RHSHasPattern) + return SDValue(); + + SDLoc DL(N); + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue); + SDValue Cmp = RHS.getOperand(0); + SDValue Z = Cmp.getOperand(0); + auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1)); + + assert(Constant && "Constant Should not be a null pointer."); + int64_t NegConstant = 0 - Constant->getSExtValue(); + + switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) { + default: break; + case ISD::SETNE: { + // when C == 0 + // --> addze X, (addic Z, -1).carry + // / + // add X, (zext(setne Z, C))-- + // \ when -32768 <= -C <= 32767 && C != 0 + // --> addze X, (addic (addi Z, -C), -1).carry + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, + DAG.getConstant(NegConstant, DL, MVT::i64)); + SDValue AddOrZ = NegConstant != 0 ? Add : Z; + SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue), + AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64)); + return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), + SDValue(Addc.getNode(), 1)); + } + case ISD::SETEQ: { + // when C == 0 + // --> addze X, (subfic Z, 0).carry + // / + // add X, (zext(sete Z, C))-- + // \ when -32768 <= -C <= 32767 && C != 0 + // --> addze X, (subfic (addi Z, -C), 0).carry + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, + DAG.getConstant(NegConstant, DL, MVT::i64)); + SDValue AddOrZ = NegConstant != 0 ? Add : Z; + SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue), + DAG.getConstant(0, DL, MVT::i64), AddOrZ); + return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), + SDValue(Subc.getNode(), 1)); + } + } + + return SDValue(); +} + +SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const { + if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget)) + return Value; + + return SDValue(); +} + +// Detect TRUNCATE operations on bitcasts of float128 values. +// What we are looking for here is the situtation where we extract a subset +// of bits from a 128 bit float. +// This can be of two forms: +// 1) BITCAST of f128 feeding TRUNCATE +// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE +// The reason this is required is because we do not have a legal i128 type +// and so we want to prevent having to store the f128 and then reload part +// of it. +SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N, + DAGCombinerInfo &DCI) const { + // If we are using CRBits then try that first. + if (Subtarget.useCRBits()) { + // Check if CRBits did anything and return that if it did. + if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI)) + return CRTruncValue; + } + + SDLoc dl(N); + SDValue Op0 = N->getOperand(0); + + // Looking for a truncate of i128 to i64. + if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64) + return SDValue(); + + int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0; + + // SRL feeding TRUNCATE. + if (Op0.getOpcode() == ISD::SRL) { + ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); + // The right shift has to be by 64 bits. + if (!ConstNode || ConstNode->getZExtValue() != 64) + return SDValue(); + + // Switch the element number to extract. + EltToExtract = EltToExtract ? 0 : 1; + // Update Op0 past the SRL. + Op0 = Op0.getOperand(0); + } + + // BITCAST feeding a TRUNCATE possibly via SRL. + if (Op0.getOpcode() == ISD::BITCAST && + Op0.getValueType() == MVT::i128 && + Op0.getOperand(0).getValueType() == MVT::f128) { + SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0)); + return DCI.DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast, + DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32)); + } + return SDValue(); +} + bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) @@ -14168,6 +14620,15 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee); } +bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const { + if (!Subtarget.hasVSX()) + return false; + if (Subtarget.hasP9Vector() && VT == MVT::f128) + return true; + return VT == MVT::f32 || VT == MVT::f64 || + VT == MVT::v4f32 || VT == MVT::v2f64; +} + bool PPCTargetLowering:: isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { const Value *Mask = AndI.getOperand(1); @@ -14184,3 +14645,109 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { // For non-constant masks, we can always use the record-form and. return true; } + +// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0) +// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0) +// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0) +// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0) +// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32 +SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const { + assert((N->getOpcode() == ISD::ABS) && "Need ABS node here"); + assert(Subtarget.hasP9Altivec() && + "Only combine this when P9 altivec supported!"); + EVT VT = N->getValueType(0); + if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + if (N->getOperand(0).getOpcode() == ISD::SUB) { + // Even for signed integers, if it's known to be positive (as signed + // integer) due to zero-extended inputs. + unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode(); + unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode(); + if ((SubOpcd0 == ISD::ZERO_EXTEND || + SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) && + (SubOpcd1 == ISD::ZERO_EXTEND || + SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), + N->getOperand(0)->getOperand(0), + N->getOperand(0)->getOperand(1), + DAG.getTargetConstant(0, dl, MVT::i32)); + } + + // For type v4i32, it can be optimized with xvnegsp + vabsduw + if (N->getOperand(0).getValueType() == MVT::v4i32 && + N->getOperand(0).hasOneUse()) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), + N->getOperand(0)->getOperand(0), + N->getOperand(0)->getOperand(1), + DAG.getTargetConstant(1, dl, MVT::i32)); + } + } + + return SDValue(); +} + +// For type v4i32/v8ii16/v16i8, transform +// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b) +// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b) +// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b) +// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b) +SDValue PPCTargetLowering::combineVSelect(SDNode *N, + DAGCombinerInfo &DCI) const { + assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"); + assert(Subtarget.hasP9Altivec() && + "Only combine this when P9 altivec supported!"); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue TrueOpnd = N->getOperand(1); + SDValue FalseOpnd = N->getOperand(2); + EVT VT = N->getOperand(1).getValueType(); + + if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || + FalseOpnd.getOpcode() != ISD::SUB) + return SDValue(); + + // ABSD only available for type v4i32/v8i16/v16i8 + if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) + return SDValue(); + + // At least to save one more dependent computation + if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) + return SDValue(); + + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + + // Can only handle unsigned comparison here + switch (CC) { + default: + return SDValue(); + case ISD::SETUGT: + case ISD::SETUGE: + break; + case ISD::SETULT: + case ISD::SETULE: + std::swap(TrueOpnd, FalseOpnd); + break; + } + + SDValue CmpOpnd1 = Cond.getOperand(0); + SDValue CmpOpnd2 = Cond.getOperand(1); + + // SETCC CmpOpnd1 CmpOpnd2 cond + // TrueOpnd = CmpOpnd1 - CmpOpnd2 + // FalseOpnd = CmpOpnd2 - CmpOpnd1 + if (TrueOpnd.getOperand(0) == CmpOpnd1 && + TrueOpnd.getOperand(1) == CmpOpnd2 && + FalseOpnd.getOperand(0) == CmpOpnd2 && + FalseOpnd.getOperand(1) == CmpOpnd1) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(), + CmpOpnd1, CmpOpnd2, + DAG.getTargetConstant(0, dl, MVT::i32)); + } + + return SDValue(); +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index f174943a8004..30acd60eba6f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -149,6 +149,10 @@ namespace llvm { /// For vector types, only the last n bits are used. See vsld. SRL, SRA, SHL, + /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign + /// word and shift left immediate. + EXTSWSLI, + /// The combination of sra[wd]i and addze used to implemented signed /// integer division by a power of 2. The first operand is the dividend, /// and the second is the constant shift amount (representing the @@ -369,6 +373,21 @@ namespace llvm { /// An SDNode for swaps that are not associated with any loads/stores /// and thereby have no chain. SWAP_NO_CHAIN, + + /// An SDNode for Power9 vector absolute value difference. + /// operand #0 vector + /// operand #1 vector + /// operand #2 constant i32 0 or 1, to indicate whether needs to patch + /// the most significant bit for signed i32 + /// + /// Power9 VABSD* instructions are designed to support unsigned integer + /// vectors (byte/halfword/word), if we want to make use of them for signed + /// integer vectors, we have to flip their sign bits first. To flip sign bit + /// for byte/halfword integer vector would become inefficient, but for word + /// integer vector, we can leverage XVNEGSP to make it efficiently. eg: + /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) + /// => VABSDUW((XVNEGSP a), (XVNEGSP b)) + VABSD, /// QVFPERM = This corresponds to the QPX qvfperm instruction. QVFPERM, @@ -557,6 +576,11 @@ namespace llvm { /// DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + bool isSelectSupported(SelectSupportKind Kind) const override { + // PowerPC does not support scalar condition selects on vectors. + return (Kind != SelectSupportKind::ScalarCondVectorVal); + } + /// getPreferredVectorAction - The code we generate when vector types are /// legalized by promoting the integer element type is often much worse /// than code we generate if we widen the type for applicable vector types. @@ -565,7 +589,7 @@ namespace llvm { /// of v4i8's and shuffle them. This will turn into a mess of 8 extending /// loads, moves back into VSR's (or memory ops if we don't have moves) and /// then the VPERM for the shuffle. All in all a very slow sequence. - TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override { if (VT.getScalarSizeInBits() % 8 == 0) return TypeWidenVector; @@ -785,6 +809,9 @@ namespace llvm { return true; } + // Returns true if the address of the global is stored in TOC entry. + bool isAccessedAsGotIndirect(SDValue N) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, @@ -923,6 +950,9 @@ namespace llvm { SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const; + SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, + const SDLoc &dl) const; + SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; @@ -988,6 +1018,7 @@ namespace llvm { SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; @@ -1088,6 +1119,11 @@ namespace llvm { SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it @@ -1122,6 +1158,7 @@ namespace llvm { // tail call. This will cause the optimizers to attempt to move, or // duplicate return instructions to help enable tail call optimizations. bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + bool hasBitPreservingFPLogic(EVT VT) const override; bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; }; // end class PPCTargetLowering diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index cdd57c6a1118..2ce6ad3293eb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -94,7 +94,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { } let Defs = [LR8] in - def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>, + def MovePCtoLR8 : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR8", []>, PPC970_Unit_BRU; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { @@ -199,47 +199,45 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), // clean this up in PPCMIPeephole with calls to // PPCInstrInfo::convertToImmediateForm() but we should probably not emit them // in the first place. -let usesCustomInserter = 1 in { - let Defs = [CR0] in { - def ATOMIC_LOAD_ADD_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64", - [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_SUB_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64", - [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_OR_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64", - [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_XOR_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64", - [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_AND_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64", - [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_NAND_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64", - [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_MIN_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64", - [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_MAX_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64", - [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_UMIN_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64", - [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>; - def ATOMIC_LOAD_UMAX_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64", - [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>; - - def ATOMIC_CMP_SWAP_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64", - [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>; - - def ATOMIC_SWAP_I64 : Pseudo< - (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64", - [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>; - } +let Defs = [CR0] in { + def ATOMIC_LOAD_ADD_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64", + [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_SUB_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64", + [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_OR_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64", + [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_XOR_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64", + [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_AND_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64", + [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_NAND_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64", + [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_MIN_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64", + [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_MAX_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64", + [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_UMIN_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64", + [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_UMAX_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64", + [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>; + + def ATOMIC_CMP_SWAP_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64", + [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>; + + def ATOMIC_SWAP_I64 : PPCCustomInserterPseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64", + [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>; } // Instructions to support atomic operations @@ -269,18 +267,18 @@ def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC), let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNdi8 :Pseudo< (outs), +def TCRETURNdi8 :PPCEmitTimePseudo< (outs), (ins calltarget:$dst, i32imm:$offset), "#TC_RETURNd8 $dst $offset", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), +def TCRETURNai8 :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), "#TC_RETURNa8 $func $offset", [(PPCtc_return (i64 imm:$func), imm:$offset)]>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), +def TCRETURNri8 : PPCEmitTimePseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), "#TC_RETURNr8 $dst $offset", []>; @@ -347,14 +345,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), } // hasExtraSrcRegAllocReq = 1 } // hasSideEffects = 0 -let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { +// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp +// is not. +let hasSideEffects = 1 in { let Defs = [CTR8] in - def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf), + def EH_SjLj_SetJmp64 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf), "#EH_SJLJ_SETJMP64", [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, Requires<[In64BitMode]>; +} + +let hasSideEffects = 1, isBarrier = 1 in { let isTerminator = 1 in - def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf), + def EH_SjLj_LongJmp64 : PPCCustomInserterPseudo<(outs), (ins memr:$buf), "#EH_SJLJ_LONGJMP64", [(PPCeh_sjlj_longjmp addr:$buf)]>, Requires<[In64BitMode]>; @@ -396,10 +399,10 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), // the POWER3. let Defs = [X1], Uses = [X1] in -def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8", +def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8", [(set i64:$result, (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>; -def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8", +def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8", [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>; let Defs = [LR8] in { @@ -717,9 +720,10 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; -defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), +defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH), "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, - []>, isPPC64; + [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>, + isPPC64, Requires<[IsISA3_0]>; // For fast-isel: let isCodeGenOnly = 1, Defs = [CARRY] in @@ -773,8 +777,12 @@ def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC) "maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64; def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64; -def SETB : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA), - "setb $RT, $BFA", IIC_IntGeneral>, isPPC64; +def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA), + "setb $RT, $BFA", IIC_IntGeneral>, isPPC64; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA), + "setb $RT, $BFA", IIC_IntGeneral>, isPPC64; +} def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L), "darn $RT, $L", IIC_LdStLD>, isPPC64; def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D), @@ -1018,19 +1026,19 @@ def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), // The following four definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). -def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), +def LDtoc: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), "#LDtoc", [(set i64:$rD, (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64; -def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), +def LDtocJTI: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), "#LDtocJTI", [(set i64:$rD, (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64; -def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), +def LDtocCPT: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), "#LDtocCPT", [(set i64:$rD, (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; -def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), +def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), "#LDtocCPT", [(set i64:$rD, (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64; @@ -1071,40 +1079,40 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), // Support for medium and large code model. let hasSideEffects = 0 in { let isReMaterializable = 1 in { -def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), +def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDIStocHA", []>, isPPC64; -def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), +def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDItocL", []>, isPPC64; } let mayLoad = 1 in -def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg), +def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg), "#LDtocL", []>, isPPC64; } // Support for thread-local storage. -def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), +def ADDISgotTprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISgotTprelHA", [(set i64:$rD, (PPCaddisGotTprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg), +def LDgotTprelL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg), "#LDgotTprelL", [(set i64:$rD, (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, isPPC64; -let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in -def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>; +let Defs = [CR7], Itinerary = IIC_LdStSync in +def CFENCE8 : PPCPostRAExpPseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>; def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), (ADD8TLS $in, tglobaltlsaddr:$g)>; -def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), +def ADDIStlsgdHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsgdHA", [(set i64:$rD, (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), +def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDItlsgdL", [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, @@ -1115,7 +1123,7 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), // correct because the branch select pass is relying on it. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8, Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in -def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), +def GETtlsADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), "#GETtlsADDR", [(set i64:$rD, (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, @@ -1125,7 +1133,7 @@ def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in -def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD), +def ADDItlsgdLADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym), "#ADDItlsgdLADDR", [(set i64:$rD, @@ -1133,12 +1141,12 @@ def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD), tglobaltlsaddr:$disp, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), +def ADDIStlsldHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsldHA", [(set i64:$rD, (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), +def ADDItlsldL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDItlsldL", [(set i64:$rD, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, @@ -1147,7 +1155,7 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), // explicitly defined when this op is created, so not mentioned here. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in -def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), +def GETtlsldADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), "#GETtlsldADDR", [(set i64:$rD, (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, @@ -1157,7 +1165,7 @@ def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in -def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD), +def ADDItlsldLADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym), "#ADDItlsldLADDR", [(set i64:$rD, @@ -1165,13 +1173,13 @@ def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD), tglobaltlsaddr:$disp, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), +def ADDISdtprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISdtprelHA", [(set i64:$rD, (PPCaddisDtprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), +def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIdtprelL", [(set i64:$rD, (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>, @@ -1221,30 +1229,30 @@ def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "stbu $rS, $dst", IIC_LdStStoreUpd, []>, + "stbu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "sthu $rS, $dst", IIC_LdStStoreUpd, []>, + "sthu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "stwu $rS, $dst", IIC_LdStStoreUpd, []>, + "stwu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STBUX8: XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stbux $rS, $dst", IIC_LdStStoreUpd, []>, + "stbux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX8: XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "sthux $rS, $dst", IIC_LdStStoreUpd, []>, + "sthux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stwux $rS, $dst", IIC_LdStStoreUpd, []>, + "stwux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; @@ -1252,13 +1260,13 @@ def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res), def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst), - "stdu $rS, $dst", IIC_LdStSTDU, []>, + "stdu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, isPPC64; def STDUX : XForm_8_memOp<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stdux $rS, $dst", IIC_LdStSTDUX, []>, + "stdux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 24969d7ef853..69b19e45c3e9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1051,6 +1051,20 @@ def : Pat<(v4f32 (ftrunc v4f32:$vA)), def : Pat<(v4f32 (fnearbyint v4f32:$vA)), (VRFIN $vA)>; +// Vector selection +def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)), + (VSEL $vC, $vB, $vA)>; +def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)), + (VSEL $vC, $vB, $vA)>; +def : Pat<(v4i32 (vselect v4i32:$vA, v4i32:$vB, v4i32:$vC)), + (VSEL $vC, $vB, $vA)>; +def : Pat<(v2i64 (vselect v2i64:$vA, v2i64:$vB, v2i64:$vC)), + (VSEL $vC, $vB, $vA)>; +def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)), + (VSEL $vC, $vB, $vA)>; +def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)), + (VSEL $vC, $vB, $vA)>; + } // end HasAltivec def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td index f5f4b46344cf..2fe765dd99e1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -2153,7 +2153,9 @@ class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, } //===----------------------------------------------------------------------===// -class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> +// EmitTimePseudo won't have encoding information for the [MC]CodeEmitter +// stuff +class PPCEmitTimePseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> : I<0, OOL, IOL, asmstr, NoItinerary> { let isCodeGenOnly = 1; let PPC64 = 0; @@ -2162,6 +2164,21 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> let hasNoSchedulingInfo = 1; } +// Instruction that require custom insertion support +// a.k.a. ISelPseudos, however, these won't have isPseudo set +class PPCCustomInserterPseudo<dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> { + let usesCustomInserter = 1; +} + +// PostRAPseudo will be expanded in expandPostRAPseudo, isPseudo flag in td +// files is set only for PostRAPseudo +class PPCPostRAExpPseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> + : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> { + let isPseudo = 1; +} + class PseudoXFormMemOp<dag OOL, dag IOL, string asmstr, list<dag> pattern> - : Pseudo<OOL, IOL, asmstr, pattern>, XFormMemOp; + : PPCPostRAExpPseudo<OOL, IOL, asmstr, pattern>, XFormMemOp; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td index 6c4e2129087c..0efe797c765d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td @@ -20,8 +20,8 @@ def HTM_get_imm : SDNodeXForm<imm, [{ return getI32Imm (N->getZExtValue(), SDLoc(N)); }]>; -let hasSideEffects = 1, usesCustomInserter = 1 in { -def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>; +let hasSideEffects = 1 in { +def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 0930f7d3b8d7..d754ce2990d2 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -987,7 +987,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::XXLOR; else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) || PPC::VSSRCRegClass.contains(DestReg, SrcReg)) - Opc = PPC::XXLORf; + Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf; else if (PPC::QFRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::QVFMR; else if (PPC::QSRCRegClass.contains(DestReg, SrcReg)) @@ -1429,17 +1429,15 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI.setDesc(get(PPC::BCLR)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(Pred[1].getReg()); + MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { MI.setDesc(get(PPC::BCLRn)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(Pred[1].getReg()); + MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); } else { MI.setDesc(get(PPC::BCCLR)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(Pred[0].getImm()) - .addReg(Pred[1].getReg()); + .add(Pred[1]); } return true; @@ -1454,7 +1452,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, MI.setDesc(get(PPC::BC)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(Pred[1].getReg()) + .add(Pred[1]) .addMBB(MBB); } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); @@ -1462,7 +1460,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, MI.setDesc(get(PPC::BCn)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(Pred[1].getReg()) + .add(Pred[1]) .addMBB(MBB); } else { MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); @@ -1471,13 +1469,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, MI.setDesc(get(PPC::BCC)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(Pred[0].getImm()) - .addReg(Pred[1].getReg()) + .add(Pred[1]) .addMBB(MBB); } return true; - } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || - OpC == PPC::BCTRL || OpC == PPC::BCTRL8) { + } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL || + OpC == PPC::BCTRL8) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) llvm_unreachable("Cannot predicate bctr[l] on the ctr register"); @@ -1487,14 +1485,12 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : (setLR ? PPC::BCCTRL : PPC::BCCTR))); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(Pred[1].getReg()); + MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); return true; } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) : (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addReg(Pred[1].getReg()); + MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); return true; } @@ -1502,7 +1498,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, : (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(Pred[0].getImm()) - .addReg(Pred[1].getReg()); + .add(Pred[1]); return true; } @@ -1822,7 +1818,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, int NewOpC = -1; int MIOpC = MI->getOpcode(); - if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8) + if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8 || + MIOpC == PPC::ANDISo || MIOpC == PPC::ANDISo8) NewOpC = MIOpC; else { NewOpC = PPC::getRecordFormOpcode(MIOpC); @@ -1912,14 +1909,36 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // compare). // Rotates are expensive instructions. If we're emitting a record-form - // rotate that can just be an andi, we should just emit the andi. - if ((MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) && - MI->getOperand(2).getImm() == 0) { + // rotate that can just be an andi/andis, we should just emit that. + if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) { + unsigned GPRRes = MI->getOperand(0).getReg(); + int64_t SH = MI->getOperand(2).getImm(); int64_t MB = MI->getOperand(3).getImm(); int64_t ME = MI->getOperand(4).getImm(); - if (MB < ME && MB >= 16) { - uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1); - NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIo : PPC::ANDIo8; + // We can only do this if both the start and end of the mask are in the + // same halfword. + bool MBInLoHWord = MB >= 16; + bool MEInLoHWord = ME >= 16; + uint64_t Mask = ~0LLU; + + if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) { + Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1); + // The mask value needs to shift right 16 if we're emitting andis. + Mask >>= MBInLoHWord ? 0 : 16; + NewOpC = MIOpC == PPC::RLWINM ? + (MBInLoHWord ? PPC::ANDIo : PPC::ANDISo) : + (MBInLoHWord ? PPC::ANDIo8 :PPC::ANDISo8); + } else if (MRI->use_empty(GPRRes) && (ME == 31) && + (ME - MB + 1 == SH) && (MB >= 16)) { + // If we are rotating by the exact number of bits as are in the mask + // and the mask is in the least significant bits of the register, + // that's just an andis. (as long as the GPR result has no uses). + Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1); + Mask >>= 16; + NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDISo :PPC::ANDISo8; + } + // If we've set the mask, we can transform. + if (Mask != ~0LLU) { MI->RemoveOperand(4); MI->RemoveOperand(3); MI->getOperand(2).setImm(Mask); @@ -2088,11 +2107,9 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const { return true; } -#ifndef NDEBUG static bool isAnImmediateOperand(const MachineOperand &MO) { return MO.isCPI() || MO.isGlobal() || MO.isImm(); } -#endif bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { auto &MBB = *MI.getParent(); @@ -2231,6 +2248,35 @@ static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, return PPC::NoRegister; } +void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI, + unsigned OpNo, + int64_t Imm) const { + assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG"); + // Replace the REG with the Immediate. + unsigned InUseReg = MI.getOperand(OpNo).getReg(); + MI.getOperand(OpNo).ChangeToImmediate(Imm); + + if (empty(MI.implicit_operands())) + return; + + // We need to make sure that the MI didn't have any implicit use + // of this REG any more. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI); + if (UseOpIdx >= 0) { + MachineOperand &MO = MI.getOperand(UseOpIdx); + if (MO.isImplicit()) + // The operands must always be in the following order: + // - explicit reg defs, + // - other explicit operands (reg uses, immediates, etc.), + // - implicit reg defs + // - implicit reg uses + // Therefore, removing the implicit operand won't change the explicit + // operands layout. + MI.RemoveOperand(UseOpIdx); + } +} + // Replace an instruction with one that materializes a constant (and sets // CR0 if the original instruction was a record-form instruction). void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, @@ -2256,10 +2302,11 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, .addImm(LII.Imm); } -MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI, - unsigned &ConstOp, - bool &SeenIntermediateUse) const { - ConstOp = ~0U; +MachineInstr *PPCInstrInfo::getForwardingDefMI( + MachineInstr &MI, + unsigned &OpNoForForwarding, + bool &SeenIntermediateUse) const { + OpNoForForwarding = ~0U; MachineInstr *DefMI = nullptr; MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -2276,7 +2323,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI, if (TargetRegisterInfo::isVirtualRegister(TrueReg)) { DefMI = MRI->getVRegDef(TrueReg); if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) { - ConstOp = i; + OpNoForForwarding = i; break; } } @@ -2297,7 +2344,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI, Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 || Opc == PPC::RLWINM || Opc == PPC::RLWINMo || Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; - if (!instrHasImmForm(MI, III) && !ConvertibleImmForm) + if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm) return nullptr; // Don't convert or %X, %Y, %Y since that's just a register move. @@ -2319,15 +2366,22 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI, if (PPC::G8RCRegClass.contains(Reg)) Reg = Reg - PPC::X0 + PPC::R0; - // Is this register defined by a load-immediate in this block? + // Is this register defined by some form of add-immediate (including + // load-immediate) within this basic block? for ( ; It != E; ++It) { if (It->modifiesRegister(Reg, &getRegisterInfo())) { - if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) { - ConstOp = i; + switch (It->getOpcode()) { + default: break; + case PPC::LI: + case PPC::LI8: + case PPC::ADDItocL: + case PPC::ADDI: + case PPC::ADDI8: + OpNoForForwarding = i; return &*It; - } else - break; - } else if (It->readsRegister(Reg, &getRegisterInfo())) + } + break; + } else if (It->readsRegister(Reg, &getRegisterInfo())) // If we see another use of this reg between the def and the MI, // we want to flat it so the def isn't deleted. SeenIntermediateUse = true; @@ -2335,7 +2389,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI, } } } - return ConstOp == ~0U ? nullptr : DefMI; + return OpNoForForwarding == ~0U ? nullptr : DefMI; } const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { @@ -2371,35 +2425,48 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { } // If this instruction has an immediate form and one of its operands is a -// result of a load-immediate, convert it to the immediate form if the constant -// is in range. +// result of a load-immediate or an add-immediate, convert it to +// the immediate form if the constant is in range. bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef) const { MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); bool PostRA = !MRI->isSSA(); bool SeenIntermediateUse = true; - unsigned ConstantOperand = ~0U; - MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand, - SeenIntermediateUse); - if (!DefMI || !DefMI->getOperand(1).isImm()) + unsigned ForwardingOperand = ~0U; + MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand, + SeenIntermediateUse); + if (!DefMI) + return false; + assert(ForwardingOperand < MI.getNumOperands() && + "The forwarding operand needs to be valid at this point"); + bool KillFwdDefMI = !SeenIntermediateUse && + MI.getOperand(ForwardingOperand).isKill(); + if (KilledDef && KillFwdDefMI) + *KilledDef = DefMI; + + ImmInstrInfo III; + bool HasImmForm = instrHasImmForm(MI, III, PostRA); + // If this is a reg+reg instruction that has a reg+imm form, + // and one of the operands is produced by an add-immediate, + // try to convert it. + if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand, + *DefMI, KillFwdDefMI)) + return true; + + if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) || + !DefMI->getOperand(1).isImm()) return false; - assert(ConstantOperand < MI.getNumOperands() && - "The constant operand needs to be valid at this point"); int64_t Immediate = DefMI->getOperand(1).getImm(); // Sign-extend to 64-bits. int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ? (Immediate | 0xFFFFFFFFFFFF0000) : Immediate; - if (KilledDef && MI.getOperand(ConstantOperand).isKill() && - !SeenIntermediateUse) - *KilledDef = DefMI; - - // If this is a reg+reg instruction that has a reg+imm form, convert it now. - ImmInstrInfo III; - if (instrHasImmForm(MI, III)) - return transformToImmForm(MI, III, ConstantOperand, SExtImm); + // If this is a reg+reg instruction that has a reg+imm form, + // and one of the operands is produced by LI, convert it now. + if (HasImmForm) + return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm); bool ReplaceWithLI = false; bool Is64BitLI = false; @@ -2443,7 +2510,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0. if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) { CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI)); - CompareUseMI.getOperand(1).ChangeToImmediate(0); + replaceInstrOperandWithImm(CompareUseMI, 1, 0); CompareUseMI.RemoveOperand(3); CompareUseMI.RemoveOperand(2); continue; @@ -2602,18 +2669,23 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; } +static bool isVFReg(unsigned Reg) { + return PPC::VFRCRegClass.contains(Reg); +} + bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, - ImmInstrInfo &III) const { + ImmInstrInfo &III, bool PostRA) const { unsigned Opc = MI.getOpcode(); // The vast majority of the instructions would need their operand 2 replaced // with an immediate when switching to the reg+imm form. A marked exception // are the update form loads/stores for which a constant operand 2 would need // to turn into a displacement and move operand 1 to the operand 2 position. III.ImmOpNo = 2; - III.ConstantOpNo = 2; + III.OpNoForForwarding = 2; III.ImmWidth = 16; III.ImmMustBeMultipleOf = 1; III.TruncateImmTo = 0; + III.IsSummingOperands = false; switch (Opc) { default: return false; case PPC::ADD4: @@ -2622,6 +2694,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 1; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8; break; case PPC::ADDC: @@ -2630,6 +2703,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8; break; case PPC::ADDCo: @@ -2637,6 +2711,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpcode = PPC::ADDICo; break; case PPC::SUBFC: @@ -2809,8 +2884,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, III.ZeroIsSpecialOrig = 1; III.ZeroIsSpecialNew = 2; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpNo = 1; - III.ConstantOpNo = 2; + III.OpNoForForwarding = 2; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break; @@ -2866,8 +2942,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, III.ZeroIsSpecialOrig = 2; III.ZeroIsSpecialNew = 3; III.IsCommutative = false; + III.IsSummingOperands = true; III.ImmOpNo = 2; - III.ConstantOpNo = 3; + III.OpNoForForwarding = 3; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break; @@ -2898,21 +2975,30 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break; } break; - // Power9 only. + // Power9 and up only. For some of these, the X-Form version has access to all + // 64 VSR's whereas the D-Form only has access to the VR's. We replace those + // with pseudo-ops pre-ra and for post-ra, we check that the register loaded + // into or stored from is one of the VR registers. case PPC::LXVX: case PPC::LXSSPX: case PPC::LXSDX: case PPC::STXVX: case PPC::STXSSPX: case PPC::STXSDX: + case PPC::XFLOADf32: + case PPC::XFLOADf64: + case PPC::XFSTOREf32: + case PPC::XFSTOREf64: if (!Subtarget.hasP9Vector()) return false; III.SignedImm = true; III.ZeroIsSpecialOrig = 1; III.ZeroIsSpecialNew = 2; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpNo = 1; - III.ConstantOpNo = 2; + III.OpNoForForwarding = 2; + III.ImmMustBeMultipleOf = 4; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LXVX: @@ -2920,24 +3006,64 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, III.ImmMustBeMultipleOf = 16; break; case PPC::LXSSPX: - III.ImmOpcode = PPC::LXSSP; - III.ImmMustBeMultipleOf = 4; + if (PostRA) { + if (isVFReg(MI.getOperand(0).getReg())) + III.ImmOpcode = PPC::LXSSP; + else { + III.ImmOpcode = PPC::LFS; + III.ImmMustBeMultipleOf = 1; + } + break; + } + LLVM_FALLTHROUGH; + case PPC::XFLOADf32: + III.ImmOpcode = PPC::DFLOADf32; break; case PPC::LXSDX: - III.ImmOpcode = PPC::LXSD; - III.ImmMustBeMultipleOf = 4; + if (PostRA) { + if (isVFReg(MI.getOperand(0).getReg())) + III.ImmOpcode = PPC::LXSD; + else { + III.ImmOpcode = PPC::LFD; + III.ImmMustBeMultipleOf = 1; + } + break; + } + LLVM_FALLTHROUGH; + case PPC::XFLOADf64: + III.ImmOpcode = PPC::DFLOADf64; break; case PPC::STXVX: III.ImmOpcode = PPC::STXV; III.ImmMustBeMultipleOf = 16; break; case PPC::STXSSPX: - III.ImmOpcode = PPC::STXSSP; - III.ImmMustBeMultipleOf = 4; + if (PostRA) { + if (isVFReg(MI.getOperand(0).getReg())) + III.ImmOpcode = PPC::STXSSP; + else { + III.ImmOpcode = PPC::STFS; + III.ImmMustBeMultipleOf = 1; + } + break; + } + LLVM_FALLTHROUGH; + case PPC::XFSTOREf32: + III.ImmOpcode = PPC::DFSTOREf32; break; case PPC::STXSDX: - III.ImmOpcode = PPC::STXSD; - III.ImmMustBeMultipleOf = 4; + if (PostRA) { + if (isVFReg(MI.getOperand(0).getReg())) + III.ImmOpcode = PPC::STXSD; + else { + III.ImmOpcode = PPC::STFD; + III.ImmMustBeMultipleOf = 1; + } + break; + } + LLVM_FALLTHROUGH; + case PPC::XFSTOREf64: + III.ImmOpcode = PPC::DFSTOREf64; break; } break; @@ -2984,13 +3110,264 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) { } } -bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III, - unsigned ConstantOpNo, - int64_t Imm) const { +// Check if the 'MI' that has the index OpNoForForwarding +// meets the requirement described in the ImmInstrInfo. +bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI, + const ImmInstrInfo &III, + unsigned OpNoForForwarding + ) const { + // As the algorithm of checking for PPC::ZERO/PPC::ZERO8 + // would not work pre-RA, we can only do the check post RA. + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (MRI.isSSA()) + return false; + + // Cannot do the transform if MI isn't summing the operands. + if (!III.IsSummingOperands) + return false; + + // The instruction we are trying to replace must have the ZeroIsSpecialOrig set. + if (!III.ZeroIsSpecialOrig) + return false; + + // We cannot do the transform if the operand we are trying to replace + // isn't the same as the operand the instruction allows. + if (OpNoForForwarding != III.OpNoForForwarding) + return false; + + // Check if the instruction we are trying to transform really has + // the special zero register as its operand. + if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO && + MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8) + return false; + + // This machine instruction is convertible if it is, + // 1. summing the operands. + // 2. one of the operands is special zero register. + // 3. the operand we are trying to replace is allowed by the MI. + return true; +} + +// Check if the DefMI is the add inst and set the ImmMO and RegMO +// accordingly. +bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI, + const ImmInstrInfo &III, + MachineOperand *&ImmMO, + MachineOperand *&RegMO) const { + unsigned Opc = DefMI.getOpcode(); + if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8) + return false; + + assert(DefMI.getNumOperands() >= 3 && + "Add inst must have at least three operands"); + RegMO = &DefMI.getOperand(1); + ImmMO = &DefMI.getOperand(2); + + // This DefMI is elgible for forwarding if it is: + // 1. add inst + // 2. one of the operands is Imm/CPI/Global. + return isAnImmediateOperand(*ImmMO); +} + +bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO, + const MachineInstr &DefMI, + const MachineInstr &MI, + bool KillDefMI + ) const { + // x = addi y, imm + // ... + // z = lfdx 0, x -> z = lfd imm(y) + // The Reg "y" can be forwarded to the MI(z) only when there is no DEF + // of "y" between the DEF of "x" and "z". + // The query is only valid post RA. + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (MRI.isSSA()) + return false; + + // MachineInstr::readsRegister only returns true if the machine + // instruction reads the exact register or its super-register. It + // does not consider uses of sub-registers which seems like strange + // behaviour. Nonetheless, if we end up with a 64-bit register here, + // get the corresponding 32-bit register to check. + unsigned Reg = RegMO.getReg(); + if (PPC::G8RCRegClass.contains(Reg)) + Reg = Reg - PPC::X0 + PPC::R0; + + // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg. + MachineBasicBlock::const_reverse_iterator It = MI; + MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend(); + It++; + for (; It != E; ++It) { + if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) + return false; + // Made it to DefMI without encountering a clobber. + if ((&*It) == &DefMI) + break; + } + assert((&*It) == &DefMI && "DefMI is missing"); + + // If DefMI also uses the register to be forwarded, we can only forward it + // if DefMI is being erased. + if (DefMI.readsRegister(Reg, &getRegisterInfo())) + return KillDefMI; + + return true; +} + +bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, + const MachineInstr &DefMI, + const ImmInstrInfo &III, + int64_t &Imm) const { + assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate"); + if (DefMI.getOpcode() == PPC::ADDItocL) { + // The operand for ADDItocL is CPI, which isn't imm at compiling time, + // However, we know that, it is 16-bit width, and has the alignment of 4. + // Check if the instruction met the requirement. + if (III.ImmMustBeMultipleOf > 4 || + III.TruncateImmTo || III.ImmWidth != 16) + return false; + + // Going from XForm to DForm loads means that the displacement needs to be + // not just an immediate but also a multiple of 4, or 16 depending on the + // load. A DForm load cannot be represented if it is a multiple of say 2. + // XForm loads do not have this restriction. + if (ImmMO.isGlobal() && + ImmMO.getGlobal()->getAlignment() < III.ImmMustBeMultipleOf) + return false; + + return true; + } + + if (ImmMO.isImm()) { + // It is Imm, we need to check if the Imm fit the range. + int64_t Immediate = ImmMO.getImm(); + // Sign-extend to 64-bits. + Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ? + (Immediate | 0xFFFFFFFFFFFF0000) : Immediate; + + if (Imm % III.ImmMustBeMultipleOf) + return false; + if (III.TruncateImmTo) + Imm &= ((1 << III.TruncateImmTo) - 1); + if (III.SignedImm) { + APInt ActualValue(64, Imm, true); + if (!ActualValue.isSignedIntN(III.ImmWidth)) + return false; + } else { + uint64_t UnsignedMax = (1 << III.ImmWidth) - 1; + if ((uint64_t)Imm > UnsignedMax) + return false; + } + } + else + return false; + + // This ImmMO is forwarded if it meets the requriement describle + // in ImmInstrInfo + return true; +} + +// If an X-Form instruction is fed by an add-immediate and one of its operands +// is the literal zero, attempt to forward the source of the add-immediate to +// the corresponding D-Form instruction with the displacement coming from +// the immediate being added. +bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI, + const ImmInstrInfo &III, + unsigned OpNoForForwarding, + MachineInstr &DefMI, + bool KillDefMI) const { + // RegMO ImmMO + // | | + // x = addi reg, imm <----- DefMI + // y = op 0 , x <----- MI + // | + // OpNoForForwarding + // Check if the MI meet the requirement described in the III. + if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding)) + return false; + + // Check if the DefMI meet the requirement + // described in the III. If yes, set the ImmMO and RegMO accordingly. + MachineOperand *ImmMO = nullptr; + MachineOperand *RegMO = nullptr; + if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO)) + return false; + assert(ImmMO && RegMO && "Imm and Reg operand must have been set"); + + // As we get the Imm operand now, we need to check if the ImmMO meet + // the requirement described in the III. If yes set the Imm. + int64_t Imm = 0; + if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm)) + return false; + + // Check if the RegMO can be forwarded to MI. + if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI)) + return false; + + // We know that, the MI and DefMI both meet the pattern, and + // the Imm also meet the requirement with the new Imm-form. + // It is safe to do the transformation now. + LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "Fed by:\n"); + LLVM_DEBUG(DefMI.dump()); + + // Update the base reg first. + MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(), + false, false, + RegMO->isKill()); + + // Then, update the imm. + if (ImmMO->isImm()) { + // If the ImmMO is Imm, change the operand that has ZERO to that Imm + // directly. + replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm); + } + else { + // Otherwise, it is Constant Pool Index(CPI) or Global, + // which is relocation in fact. We need to replace the special zero + // register with ImmMO. + // Before that, we need to fixup the target flags for imm. + // For some reason, we miss to set the flag for the ImmMO if it is CPI. + if (DefMI.getOpcode() == PPC::ADDItocL) + ImmMO->setTargetFlags(PPCII::MO_TOC_LO); + + // MI didn't have the interface such as MI.setOperand(i) though + // it has MI.getOperand(i). To repalce the ZERO MachineOperand with + // ImmMO, we need to remove ZERO operand and all the operands behind it, + // and, add the ImmMO, then, move back all the operands behind ZERO. + SmallVector<MachineOperand, 2> MOps; + for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) { + MOps.push_back(MI.getOperand(i)); + MI.RemoveOperand(i); + } + + // Remove the last MO in the list, which is ZERO operand in fact. + MOps.pop_back(); + // Add the imm operand. + MI.addOperand(*ImmMO); + // Now add the rest back. + for (auto &MO : MOps) + MI.addOperand(MO); + } + + // Update the opcode. + MI.setDesc(get(III.ImmOpcode)); + + LLVM_DEBUG(dbgs() << "With:\n"); + LLVM_DEBUG(MI.dump()); + + return true; +} + +bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, + const ImmInstrInfo &III, + unsigned ConstantOpNo, + int64_t Imm) const { MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); bool PostRA = !MRI.isSSA(); // Exit early if we can't convert this. - if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative) + if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative) return false; if (Imm % III.ImmMustBeMultipleOf) return false; @@ -3035,7 +3412,7 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III, Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo; MI.setDesc(get(III.ImmOpcode)); - if (ConstantOpNo == III.ConstantOpNo) { + if (ConstantOpNo == III.OpNoForForwarding) { // Converting shifts to immediate form is a bit tricky since they may do // one of three things: // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero @@ -3063,42 +3440,47 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III, uint64_t SH = RightShift ? 32 - ShAmt : ShAmt; uint64_t MB = RightShift ? ShAmt : 0; uint64_t ME = RightShift ? 31 : 31 - ShAmt; - MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH); + replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB) .addImm(ME); } else { // Left shifts use (N, 63-N), right shifts use (64-N, N). uint64_t SH = RightShift ? 64 - ShAmt : ShAmt; uint64_t ME = RightShift ? ShAmt : 63 - ShAmt; - MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH); + replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME); } } } else - MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm); + replaceInstrOperandWithImm(MI, ConstantOpNo, Imm); } // Convert commutative instructions (switch the operands and convert the // desired one to an immediate. else if (III.IsCommutative) { - MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm); - swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo); + replaceInstrOperandWithImm(MI, ConstantOpNo, Imm); + swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding); } else llvm_unreachable("Should have exited early!"); // For instructions for which the constant register replaces a different // operand than where the immediate goes, we need to swap them. - if (III.ConstantOpNo != III.ImmOpNo) - swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo); + if (III.OpNoForForwarding != III.ImmOpNo) + swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo); - // If the R0/X0 register is special for the original instruction and not for - // the new instruction (or vice versa), we need to fix up the register class. + // If the special R0/X0 register index are different for original instruction + // and new instruction, we need to fix up the register class in new + // instruction. if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { - if (!III.ZeroIsSpecialOrig) { + if (III.ZeroIsSpecialNew) { + // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no + // need to fix up register class. unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); - const TargetRegisterClass *NewRC = - MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ? - &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass; - MRI.setRegClass(RegToModify, NewRC); + if (TargetRegisterInfo::isVirtualRegister(RegToModify)) { + const TargetRegisterClass *NewRC = + MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ? + &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass; + MRI.setRegClass(RegToModify, NewRC); + } } } return true; @@ -3140,6 +3522,7 @@ static bool isSignExtendingOp(const MachineInstr &MI) { Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo || Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 || Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo || + Opcode == PPC::SETB || Opcode == PPC::SETB8 || Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 || Opcode == PPC::EXTSB8_32_64) return true; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index ba82f56a2464..7ed558b835af 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -91,8 +91,8 @@ struct ImmInstrInfo { uint64_t ZeroIsSpecialNew : 3; // Is the operation commutative? uint64_t IsCommutative : 1; - // The operand number to check for load immediate. - uint64_t ConstantOpNo : 3; + // The operand number to check for add-immediate def. + uint64_t OpNoForForwarding : 3; // The operand number for the immediate. uint64_t ImmOpNo : 3; // The opcode of the new instruction. @@ -101,6 +101,8 @@ struct ImmInstrInfo { uint64_t ImmWidth : 5; // The immediate should be truncated to N bits. uint64_t TruncateImmTo : 5; + // Is the instruction summing the operand + uint64_t IsSummingOperands : 1; }; // Information required to convert an instruction to just a materialized @@ -123,10 +125,42 @@ class PPCInstrInfo : public PPCGenInstrInfo { unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr *> &NewMIs) const; - bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III, - unsigned ConstantOpNo, int64_t Imm) const; - MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp, - bool &SeenIntermediateUse) const; + + // If the inst has imm-form and one of its operand is produced by a LI, + // put the imm into the inst directly and remove the LI if possible. + bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III, + unsigned ConstantOpNo, int64_t Imm) const; + // If the inst has imm-form and one of its operand is produced by an + // add-immediate, try to transform it when possible. + bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III, + unsigned ConstantOpNo, + MachineInstr &DefMI, + bool KillDefMI) const; + // Try to find that, if the instruction 'MI' contains any operand that + // could be forwarded from some inst that feeds it. If yes, return the + // Def of that operand. And OpNoForForwarding is the operand index in + // the 'MI' for that 'Def'. If we see another use of this Def between + // the Def and the MI, SeenIntermediateUse becomes 'true'. + MachineInstr *getForwardingDefMI(MachineInstr &MI, + unsigned &OpNoForForwarding, + bool &SeenIntermediateUse) const; + + // Can the user MI have it's source at index \p OpNoForForwarding + // forwarded from an add-immediate that feeds it? + bool isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III, + unsigned OpNoForForwarding) const; + bool isDefMIElgibleForForwarding(MachineInstr &DefMI, + const ImmInstrInfo &III, + MachineOperand *&ImmMO, + MachineOperand *&RegMO) const; + bool isImmElgibleForForwarding(const MachineOperand &ImmMO, + const MachineInstr &DefMI, + const ImmInstrInfo &III, + int64_t &Imm) const; + bool isRegElgibleForForwarding(const MachineOperand &RegMO, + const MachineInstr &DefMI, + const MachineInstr &MI, + bool KillDefMI) const; const unsigned *getStoreOpcodesForSpillArray() const; const unsigned *getLoadOpcodesForSpillArray() const; virtual void anchor(); @@ -158,6 +192,16 @@ public: bool isXFormMemOp(unsigned Opcode) const { return get(Opcode).TSFlags & PPCII::XFormMemOp; } + static bool isSameClassPhysRegCopy(unsigned Opcode) { + unsigned CopyOpcodes[] = + { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf, + PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb, + PPC::CROR, PPC::EVOR, -1U }; + for (int i = 0; CopyOpcodes[i] != -1U; i++) + if (Opcode == CopyOpcodes[i]) + return true; + return false; + } ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, @@ -369,8 +413,30 @@ public: bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef = nullptr) const; void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; - - bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const; + void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, + int64_t Imm) const; + + bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III, + bool PostRA) const; + + /// getRegNumForOperand - some operands use different numbering schemes + /// for the same registers. For example, a VSX instruction may have any of + /// vs0-vs63 allocated whereas an Altivec instruction could only have + /// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual + /// register number needed for the opcode/operand number combination. + /// The operand number argument will be useful when we need to extend this + /// to instructions that use both Altivec and VSX numbering (for different + /// operands). + static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg, + unsigned OpNo) { + if (Desc.TSFlags & PPCII::UseVSXReg) { + if (isVRRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::V0); + else if (isVFRegister(Reg)) + Reg = PPC::VSX32 + (Reg - PPC::VF0); + } + return Reg; + } }; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 1a43037e4a4b..dd3f1ac79089 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -114,6 +114,10 @@ def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [ SDTCisVec<0>, SDTCisPtrTy<1> ]>; +def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli + SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -218,6 +222,8 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; +def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>; + // Move 2 i64 values into a VSX register def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", SDTypeProfile<1, 2, @@ -1189,77 +1195,76 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, //===----------------------------------------------------------------------===// // PowerPC Instruction Definitions. -// Pseudo-instructions: +// Pseudo instructions: let hasCtrlDep = 1 in { let Defs = [R1], Uses = [R1] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), +def ADJCALLSTACKDOWN : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKDOWN $amt1 $amt2", [(callseq_start timm:$amt1, timm:$amt2)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), +def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>; } -def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS), +def UPDATE_VRSAVE : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS), "UPDATE_VRSAVE $rD, $rS", []>; } let Defs = [R1], Uses = [R1] in -def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC", +def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC", [(set i32:$result, (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>; -def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET", +def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET", [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>; // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. -let usesCustomInserter = 1, // Expanded after instruction selection. - PPC970_Single = 1 in { +let PPC970_Single = 1 in { // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes // because either operand might become the first operand in an isel, and // that operand cannot be r0. - def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond, + def SELECT_CC_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crrc:$cond, gprc_nor0:$T, gprc_nor0:$F, i32imm:$BROPC), "#SELECT_CC_I4", []>; - def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond, + def SELECT_CC_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crrc:$cond, g8rc_nox0:$T, g8rc_nox0:$F, i32imm:$BROPC), "#SELECT_CC_I8", []>; - def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F, + def SELECT_CC_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F, i32imm:$BROPC), "#SELECT_CC_F4", []>; - def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F, + def SELECT_CC_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F, i32imm:$BROPC), "#SELECT_CC_F8", []>; - def SELECT_CC_F16 : Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, + def SELECT_CC_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, i32imm:$BROPC), "#SELECT_CC_F16", []>; - def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, + def SELECT_CC_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, i32imm:$BROPC), "#SELECT_CC_VRRC", []>; // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition // register bit directly. - def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond, + def SELECT_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crbitrc:$cond, gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4", [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>; - def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond, + def SELECT_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crbitrc:$cond, g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8", [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>; let Predicates = [HasFPU] in { - def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond, + def SELECT_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crbitrc:$cond, f4rc:$T, f4rc:$F), "#SELECT_F4", [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; - def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond, + def SELECT_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crbitrc:$cond, f8rc:$T, f8rc:$F), "#SELECT_F8", [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; - def SELECT_F16 : Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond, + def SELECT_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond, vrrc:$T, vrrc:$F), "#SELECT_F16", [(set f128:$dst, (select i1:$cond, f128:$T, f128:$F))]>; } - def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond, + def SELECT_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond, vrrc:$T, vrrc:$F), "#SELECT_VRRC", [(set v4i32:$dst, (select i1:$cond, v4i32:$T, v4i32:$F))]>; @@ -1268,18 +1273,18 @@ let Predicates = [HasFPU] in { // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. let mayStore = 1 in { -def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F), +def SPILL_CR : PPCEmitTimePseudo<(outs), (ins crrc:$cond, memri:$F), "#SPILL_CR", []>; -def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F), +def SPILL_CRBIT : PPCEmitTimePseudo<(outs), (ins crbitrc:$cond, memri:$F), "#SPILL_CRBIT", []>; } // RESTORE_CR - Indicate that we're restoring the CR register (previously // spilled), so we'll need to scavenge a register for it. let mayLoad = 1 in { -def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F), +def RESTORE_CR : PPCEmitTimePseudo<(outs crrc:$cond), (ins memri:$F), "#RESTORE_CR", []>; -def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F), +def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F), "#RESTORE_CRBIT", []>; } @@ -1305,10 +1310,10 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { } let Defs = [LR] in - def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>, + def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; let Defs = [LR] in - def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>, + def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>, PPC970_Unit_BRU; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { @@ -1506,19 +1511,19 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNdi :Pseudo< (outs), +def TCRETURNdi :PPCEmitTimePseudo< (outs), (ins calltarget:$dst, i32imm:$offset), "#TC_RETURNd $dst $offset", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), +def TCRETURNai :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), "#TC_RETURNa $func $offset", [(PPCtc_return (i32 imm:$func), imm:$offset)]>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset), +def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset), "#TC_RETURNr $dst $offset", []>; @@ -1544,14 +1549,19 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), } -let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { +// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp +// is not. +let hasSideEffects = 1 in { let Defs = [CTR] in - def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf), + def EH_SjLj_SetJmp32 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf), "#EH_SJLJ_SETJMP32", [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, Requires<[In32BitMode]>; +} + +let hasSideEffects = 1, isBarrier = 1 in { let isTerminator = 1 in - def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf), + def EH_SjLj_LongJmp32 : PPCCustomInserterPseudo<(outs), (ins memr:$buf), "#EH_SJLJ_LONGJMP32", [(PPCeh_sjlj_longjmp addr:$buf)]>, Requires<[In32BitMode]>; @@ -1561,7 +1571,7 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { // a terminator. Size is set to 0 to prevent the builtin assembler // from emitting it. let isBranch = 1, isTerminator = 1, Size = 0 in { - def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), + def EH_SjLj_Setup : PPCEmitTimePseudo<(outs), (ins directbrtarget:$dst), "#EH_SjLj_Setup\t$dst", []>; } @@ -1648,119 +1658,117 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)), // clean this up in PPCMIPeephole with calls to // PPCInstrInfo::convertToImmediateForm() but we should probably not emit them // in the first place. -let usesCustomInserter = 1 in { - let Defs = [CR0] in { - def ATOMIC_LOAD_ADD_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8", - [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_SUB_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8", - [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_AND_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8", - [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_OR_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8", - [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_XOR_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8", - [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_NAND_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8", - [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_MIN_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8", - [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_MAX_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8", - [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_UMIN_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8", - [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_UMAX_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8", - [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_ADD_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16", - [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_SUB_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16", - [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_AND_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16", - [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_OR_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16", - [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_XOR_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16", - [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_NAND_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16", - [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_MIN_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16", - [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_MAX_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16", - [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_UMIN_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16", - [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_UMAX_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16", - [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_ADD_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32", - [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_SUB_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32", - [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_AND_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32", - [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_OR_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32", - [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_XOR_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32", - [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_NAND_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32", - [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_MIN_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32", - [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_MAX_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32", - [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_UMIN_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32", - [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>; - def ATOMIC_LOAD_UMAX_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32", - [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>; - - def ATOMIC_CMP_SWAP_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", - [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>; - def ATOMIC_CMP_SWAP_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", - [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>; - def ATOMIC_CMP_SWAP_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", - [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>; - - def ATOMIC_SWAP_I8 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8", - [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>; - def ATOMIC_SWAP_I16 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16", - [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>; - def ATOMIC_SWAP_I32 : Pseudo< - (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32", - [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>; - } +let Defs = [CR0] in { + def ATOMIC_LOAD_ADD_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8", + [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_SUB_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8", + [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_AND_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8", + [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_OR_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8", + [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_XOR_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8", + [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_NAND_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8", + [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8", + [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8", + [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8", + [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8", + [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_ADD_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16", + [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_SUB_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16", + [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_AND_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16", + [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_OR_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16", + [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_XOR_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16", + [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_NAND_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16", + [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16", + [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16", + [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16", + [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16", + [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_ADD_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32", + [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_SUB_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32", + [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_AND_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32", + [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_OR_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32", + [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_XOR_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32", + [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_NAND_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32", + [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32", + [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32", + [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32", + [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32", + [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>; + + def ATOMIC_CMP_SWAP_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", + [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>; + def ATOMIC_CMP_SWAP_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", + [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>; + def ATOMIC_CMP_SWAP_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", + [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>; + + def ATOMIC_SWAP_I8 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8", + [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>; + def ATOMIC_SWAP_I16 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16", + [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>; + def ATOMIC_SWAP_I32 : PPCCustomInserterPseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32", + [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>; } def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new), @@ -1988,15 +1996,15 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { -def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src), - "stb $rS, $src", IIC_LdStStore, - [(truncstorei8 i32:$rS, iaddr:$src)]>; -def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src), - "sth $rS, $src", IIC_LdStStore, - [(truncstorei16 i32:$rS, iaddr:$src)]>; -def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src), - "stw $rS, $src", IIC_LdStStore, - [(store i32:$rS, iaddr:$src)]>; +def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst), + "stb $rS, $dst", IIC_LdStStore, + [(truncstorei8 i32:$rS, iaddr:$dst)]>; +def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst), + "sth $rS, $dst", IIC_LdStStore, + [(truncstorei16 i32:$rS, iaddr:$dst)]>; +def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst), + "stw $rS, $dst", IIC_LdStStore, + [(store i32:$rS, iaddr:$dst)]>; let Predicates = [HasFPU] in { def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), "stfs $rS, $dst", IIC_LdStSTFD, @@ -2010,13 +2018,13 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), // Unindexed (r+i) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "stbu $rS, $dst", IIC_LdStStoreUpd, []>, + "stbu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "sthu $rS, $dst", IIC_LdStStoreUpd, []>, + "sthu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "stwu $rS, $dst", IIC_LdStStoreUpd, []>, + "stwu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; let Predicates = [HasFPU] in { def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst), @@ -2084,19 +2092,19 @@ def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "stbux $rS, $dst", IIC_LdStStoreUpd, []>, + "stbux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "sthux $rS, $dst", IIC_LdStStoreUpd, []>, + "sthux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "stwux $rS, $dst", IIC_LdStStoreUpd, []>, + "stwux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; @@ -2543,8 +2551,8 @@ def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT), // A pseudo-instruction used to implement the read of the 64-bit cycle counter // on a 32-bit target. -let hasSideEffects = 1, usesCustomInserter = 1 in -def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins), +let hasSideEffects = 1 in +def ReadTB : PPCCustomInserterPseudo<(outs gprc:$lo, gprc:$hi), (ins), "#ReadTB", []>; let Uses = [CTR] in { @@ -2603,13 +2611,13 @@ def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>; // SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, // so we'll need to scavenge a register for it. let mayStore = 1 in -def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), +def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), "#SPILL_VRSAVE", []>; // RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously // spilled), so we'll need to scavenge a register for it. let mayLoad = 1 in -def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), +def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), "#RESTORE_VRSAVE", []>; let hasSideEffects = 0 in { @@ -2648,9 +2656,9 @@ def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins), } // hasSideEffects = 0 let Predicates = [HasFPU] in { -// Pseudo instruction to perform FADD in round-to-zero mode. -let usesCustomInserter = 1, Uses = [RM] in { - def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", +// Custom inserter instruction to perform FADD in round-to-zero mode. +let Uses = [RM] in { + def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; } @@ -3022,23 +3030,23 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), (ADDIS $in, tblockaddress:$g)>; // Support for thread-local storage. -def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", +def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT", [(set i32:$rD, (PPCppc32GOT))]>; // Get the _GLOBAL_OFFSET_TABLE_ in PIC mode. // This uses two output registers, the first as the real output, the second as a // temporary register, used internally in code generation. -def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", +def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", []>, NoEncode<"$rT">; -def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), +def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), "#LDgotTprelL32", [(set i32:$rD, (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), (ADD4TLS $in, tglobaltlsaddr:$g)>; -def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), +def ADDItlsgdL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsgdL32", [(set i32:$rD, (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; @@ -3046,7 +3054,7 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), // explicitly defined when this op is created, so not mentioned here. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in -def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), +def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), "GETtlsADDR32", [(set i32:$rD, (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; @@ -3054,14 +3062,14 @@ def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), // are true defines while the rest of the Defs are clobbers. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in -def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD), +def ADDItlsgdLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), "#ADDItlsgdLADDR32", [(set i32:$rD, (PPCaddiTlsgdLAddr i32:$reg, tglobaltlsaddr:$disp, tglobaltlsaddr:$sym))]>; -def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), +def ADDItlsldL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsldL32", [(set i32:$rD, (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; @@ -3069,7 +3077,7 @@ def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), // explicitly defined when this op is created, so not mentioned here. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in -def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), +def GETtlsldADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), "GETtlsldADDR32", [(set i32:$rD, (PPCgetTlsldAddr i32:$reg, @@ -3078,31 +3086,31 @@ def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), // are true defines while the rest of the Defs are clobbers. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in -def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD), +def ADDItlsldLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), "#ADDItlsldLADDR32", [(set i32:$rD, (PPCaddiTlsldLAddr i32:$reg, tglobaltlsaddr:$disp, tglobaltlsaddr:$sym))]>; -def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), +def ADDIdtprelL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDIdtprelL32", [(set i32:$rD, (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>; -def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), +def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDISdtprelHA32", [(set i32:$rD, (PPCaddisDtprelHA i32:$reg, tglobaltlsaddr:$disp))]>; // Support for Position-independent code -def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), +def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), "#LWZtoc", [(set i32:$rD, (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; // Get Global (GOT) Base Register offset, from the word immediately preceding // the function label. -def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; +def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; // Standard shifts. These are represented separately from the real shifts above @@ -3930,21 +3938,19 @@ def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)), def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; -let usesCustomInserter = 1 in { -def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), +def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in), "#ANDIo_1_EQ_BIT", [(set i1:$dst, (trunc (not i32:$in)))]>; -def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), +def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in), "#ANDIo_1_GT_BIT", [(set i1:$dst, (trunc i32:$in))]>; -def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), +def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in), "#ANDIo_1_EQ_BIT8", [(set i1:$dst, (trunc (not i64:$in)))]>; -def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), +def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in), "#ANDIo_1_GT_BIT8", [(set i1:$dst, (trunc i64:$in))]>; -} def : Pat<(i1 (not (trunc i32:$in))), (ANDIo_1_EQ_BIT $in)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td index c4bb02695b36..ef589ad01fd7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td @@ -245,32 +245,30 @@ let Uses = [RM] in { // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. - let usesCustomInserter = 1 in { - def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F, - i32imm:$BROPC), "#SELECT_CC_QFRC", - []>; - def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F, - i32imm:$BROPC), "#SELECT_CC_QSRC", - []>; - def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F, - i32imm:$BROPC), "#SELECT_CC_QBRC", - []>; - - // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition - // register bit directly. - def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond, - qfrc:$T, qfrc:$F), "#SELECT_QFRC", - [(set v4f64:$dst, - (select i1:$cond, v4f64:$T, v4f64:$F))]>; - def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond, - qsrc:$T, qsrc:$F), "#SELECT_QSRC", - [(set v4f32:$dst, - (select i1:$cond, v4f32:$T, v4f32:$F))]>; - def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond, - qbrc:$T, qbrc:$F), "#SELECT_QBRC", - [(set v4i1:$dst, - (select i1:$cond, v4i1:$T, v4i1:$F))]>; - } + def SELECT_CC_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F, + i32imm:$BROPC), "#SELECT_CC_QFRC", + []>; + def SELECT_CC_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F, + i32imm:$BROPC), "#SELECT_CC_QSRC", + []>; + def SELECT_CC_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F, + i32imm:$BROPC), "#SELECT_CC_QBRC", + []>; + + // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition + // register bit directly. + def SELECT_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crbitrc:$cond, + qfrc:$T, qfrc:$F), "#SELECT_QFRC", + [(set v4f64:$dst, + (select i1:$cond, v4f64:$T, v4f64:$F))]>; + def SELECT_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crbitrc:$cond, + qsrc:$T, qsrc:$F), "#SELECT_QSRC", + [(set v4f32:$dst, + (select i1:$cond, v4f32:$T, v4f32:$F))]>; + def SELECT_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crbitrc:$cond, + qbrc:$T, qbrc:$F), "#SELECT_QBRC", + [(set v4i1:$dst, + (select i1:$cond, v4i1:$T, v4i1:$F))]>; // Convert and Round Instructions def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td index 96649efdc1bc..9f5891a45f22 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -831,22 +831,20 @@ def : Pat<(f64 (fpextend f32:$src)), } let Predicates = [HasSPE] in { - let usesCustomInserter = 1 in { -def SELECT_CC_SPE4 : Pseudo<(outs spe4rc:$dst), +def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crrc:$cond, spe4rc:$T, spe4rc:$F, i32imm:$BROPC), "#SELECT_CC_SPE4", []>; -def SELECT_CC_SPE : Pseudo<(outs sperc:$dst), +def SELECT_CC_SPE : PPCCustomInserterPseudo<(outs sperc:$dst), (ins crrc:$cond, sperc:$T, sperc:$F, i32imm:$BROPC), "#SELECT_CC_SPE", []>; -def SELECT_SPE4 : Pseudo<(outs spe4rc:$dst), (ins crbitrc:$cond, +def SELECT_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crbitrc:$cond, spe4rc:$T, spe4rc:$F), "#SELECT_SPE4", [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; -def SELECT_SPE : Pseudo<(outs sperc:$dst), (ins crbitrc:$cond, +def SELECT_SPE : PPCCustomInserterPseudo<(outs sperc:$dst), (ins crbitrc:$cond, sperc:$T, sperc:$F), "#SELECT_SPE", [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; - } def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), (SELECT_SPE4 (CRANDC $lhs, $rhs), $tval, $fval)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 781a3277441a..0f073388dc74 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -67,6 +67,10 @@ def SDT_PPCxxswapd : SDTypeProfile<1, 1, [ def SDTVecConv : SDTypeProfile<1, 2, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2> ]>; +def SDTVabsd : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32> +]>; + def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -79,6 +83,7 @@ def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; +def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -132,7 +137,7 @@ let Uses = [RM] in { []>; // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later - let isPseudo = 1, CodeSize = 3 in + let CodeSize = 3 in def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#XFLOADf64", [(set f64:$XT, (load xoaddr:$src))]>; @@ -163,7 +168,7 @@ let Uses = [RM] in { []>; // Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later - let isPseudo = 1, CodeSize = 3 in + let CodeSize = 3 in def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), "#XFSTOREf64", [(store f64:$XT, xoaddr:$dst)]>; @@ -898,37 +903,36 @@ let Uses = [RM] in { // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. -let usesCustomInserter = 1, // Expanded after instruction selection. - PPC970_Single = 1 in { +let PPC970_Single = 1 in { - def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst), + def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst), (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC), "#SELECT_CC_VSRC", []>; - def SELECT_VSRC: Pseudo<(outs vsrc:$dst), + def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst), (ins crbitrc:$cond, vsrc:$T, vsrc:$F), "#SELECT_VSRC", [(set v2f64:$dst, (select i1:$cond, v2f64:$T, v2f64:$F))]>; - def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst), + def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F, i32imm:$BROPC), "#SELECT_CC_VSFRC", []>; - def SELECT_VSFRC: Pseudo<(outs f8rc:$dst), + def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crbitrc:$cond, f8rc:$T, f8rc:$F), "#SELECT_VSFRC", [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; - def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst), + def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F, i32imm:$BROPC), "#SELECT_CC_VSSRC", []>; - def SELECT_VSSRC: Pseudo<(outs f4rc:$dst), + def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crbitrc:$cond, f4rc:$T, f4rc:$F), "#SELECT_VSSRC", [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; -} // usesCustomInserter +} } // AddedComplexity def : InstAlias<"xvmovdp $XT, $XB", @@ -1040,17 +1044,14 @@ def : Pat<(v2f64 (bitconvert v1i128:$A)), def : Pat<(v1i128 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; -// sign extension patterns -// To extend "in place" from v2i32 to v2i64, we have input data like: -// | undef | i32 | undef | i32 | -// but xvcvsxwdp expects the input in big-Endian format: -// | i32 | undef | i32 | undef | -// so we need to shift everything to the left by one i32 (word) before -// the conversion. -def : Pat<(sext_inreg v2i64:$C, v2i32), - (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>; -def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))), - (XVCVSXWDP (XXSLDWI $C, $C, 1))>; +def : Pat<(v2i64 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)), (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>; @@ -1069,10 +1070,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { // Stores. def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), - (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; } let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { @@ -1159,6 +1156,26 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), (XVRSQRTEDP $A)>; +// Vector selection +def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)), + (COPY_TO_REGCLASS + (XXSEL (COPY_TO_REGCLASS $vC, VSRC), + (COPY_TO_REGCLASS $vB, VSRC), + (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; +def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)), + (COPY_TO_REGCLASS + (XXSEL (COPY_TO_REGCLASS $vC, VSRC), + (COPY_TO_REGCLASS $vB, VSRC), + (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; +def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC), + (XXSEL $vC, $vB, $vA)>; +def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC), + (XXSEL $vC, $vB, $vA)>; +def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC), + (XXSEL $vC, $vB, $vA)>; +def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC), + (XXSEL $vC, $vB, $vA)>; + let Predicates = [IsLittleEndian] in { def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; @@ -1200,6 +1217,27 @@ def ScalarLoads { dag Li32 = (i32 (load xoaddr:$src)); } +def DWToSPExtractConv { + dag El0US1 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); + dag El1US1 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); + dag El0US2 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); + dag El1US2 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); + dag El0SS1 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); + dag El1SS1 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); + dag El0SS2 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); + dag El1SS2 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); + dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2)); + dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2)); +} + // The following VSX instructions were introduced in Power ISA 2.07 /* FIXME: if the operands are v2i64, these patterns will not match. we should define new patterns or otherwise match the same patterns @@ -1241,23 +1279,19 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src), "lxsiwzx $XT, $src", IIC_LdStLFD, []>; - // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it - // would cause these Pseudos are not expanded in expandPostRAPseudos() - let isPseudo = 1 in { - // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later - let CodeSize = 3 in - def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), - "#XFLOADf32", - [(set f32:$XT, (load xoaddr:$src))]>; - // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later - def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), - "#LIWAX", - [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; - // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later - def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), - "#LIWZX", - [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; - } + // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later + let CodeSize = 3 in + def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), + "#XFLOADf32", + [(set f32:$XT, (load xoaddr:$src))]>; + // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later + def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWAX", + [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later + def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWZX", + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; } // mayLoad // VSX scalar stores introduced in ISA 2.07 @@ -1268,19 +1302,15 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), "stxsiwx $XT, $dst", IIC_LdStSTFD, []>; - // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it - // would cause these Pseudos are not expanded in expandPostRAPseudos() - let isPseudo = 1 in { - // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later - let CodeSize = 3 in - def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), - "#XFSTOREf32", - [(store f32:$XT, xoaddr:$dst)]>; - // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later - def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), - "#STIWX", - [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; - } + // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later + let CodeSize = 3 in + def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), + "#XFSTOREf32", + [(store f32:$XT, xoaddr:$dst)]>; + // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later + def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), + "#STIWX", + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; } // mayStore } // UseVSXReg = 1 @@ -1443,35 +1473,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. } // UseVSXReg = 1 let Predicates = [IsLittleEndian] in { - def : Pat<(f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), - (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; - def : Pat<(f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), + def : Pat<DWToSPExtractConv.El0SS1, + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; + def : Pat<DWToSPExtractConv.El1SS1, (f32 (XSCVSXDSP (COPY_TO_REGCLASS - (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; - def : Pat<(f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), - (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; - def : Pat<(f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), + (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>; + def : Pat<DWToSPExtractConv.El0US1, + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; + def : Pat<DWToSPExtractConv.El1US1, (f32 (XSCVUXDSP (COPY_TO_REGCLASS - (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; + (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>; } let Predicates = [IsBigEndian] in { - def : Pat<(f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), - (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; - def : Pat<(f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), - (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; - def : Pat<(f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), - (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; - def : Pat<(f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), - (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<DWToSPExtractConv.El0SS1, + (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>; + def : Pat<DWToSPExtractConv.El1SS1, + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; + def : Pat<DWToSPExtractConv.El0US1, + (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>; + def : Pat<DWToSPExtractConv.El1US1, + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; } // Instructions for converting float to i64 feeding a store. @@ -1993,6 +2015,10 @@ let Predicates = [IsLittleEndian, HasVSX] in def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; +def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), + (STXVD2X $rS, xoaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -2671,6 +2697,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>; + def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)), + (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>; + // Extract Exponent/Significand DP/QP def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; @@ -2678,6 +2707,10 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; + def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)), + (i64 (MFVSRD (EXTRACT_SUBREG + (v2i64 (XSXEXPQP $vA)), sub_64)))>; + // Vector Insert Word let UseVSXReg = 1 in { // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. @@ -3238,20 +3271,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(f64 (PPCVexts f64:$A, 2)), (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; - let isPseudo = 1 in { - def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), - "#DFLOADf32", - [(set f32:$XT, (load ixaddr:$src))]>; - def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src), - "#DFLOADf64", - [(set f64:$XT, (load ixaddr:$src))]>; - def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst), - "#DFSTOREf32", - [(store f32:$XT, ixaddr:$dst)]>; - def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst), - "#DFSTOREf64", - [(store f64:$XT, ixaddr:$dst)]>; - } + def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src), + "#DFLOADf32", + [(set f32:$XT, (load ixaddr:$src))]>; + def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src), + "#DFLOADf64", + [(set f64:$XT, (load ixaddr:$src))]>; + def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst), + "#DFSTOREf32", + [(store f32:$XT, ixaddr:$dst)]>; + def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst), + "#DFSTOREf64", + [(store f64:$XT, ixaddr:$dst)]>; + def : Pat<(f64 (extloadf32 ixaddr:$src)), (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>; def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))), @@ -3533,22 +3565,20 @@ let AddedComplexity = 400 in { } let Predicates = [HasP9Vector] in { - let isPseudo = 1 in { - let mayStore = 1 in { - def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), - (ins spilltovsrrc:$XT, memrr:$dst), - "#SPILLTOVSR_STX", []>; - def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), - "#SPILLTOVSR_ST", []>; - } - let mayLoad = 1 in { - def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT), - (ins memrr:$src), - "#SPILLTOVSR_LDX", []>; - def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), - "#SPILLTOVSR_LD", []>; + let mayStore = 1 in { + def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), + (ins spilltovsrrc:$XT, memrr:$dst), + "#SPILLTOVSR_STX", []>; + def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), + "#SPILLTOVSR_ST", []>; + } + let mayLoad = 1 in { + def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT), + (ins memrr:$src), + "#SPILLTOVSR_LDX", []>; + def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), + "#SPILLTOVSR_LD", []>; - } } } // Integer extend helper dags 32 -> 64 @@ -3797,6 +3827,15 @@ let AddedComplexity = 400 in { (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; } + let Predicates = [IsBigEndian, HasP8Vector] in { + def : Pat<DWToSPExtractConv.BVU, + (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3), + (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>; + def : Pat<DWToSPExtractConv.BVS, + (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3), + (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>; + } + // Big endian, available on all targets with VSX let Predicates = [IsBigEndian, HasVSX] in { def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), @@ -3825,6 +3864,15 @@ let AddedComplexity = 400 in { (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; } + let Predicates = [IsLittleEndian, HasP8Vector] in { + def : Pat<DWToSPExtractConv.BVU, + (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3), + (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>; + def : Pat<DWToSPExtractConv.BVS, + (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3), + (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>; + } + let Predicates = [IsLittleEndian, HasVSX] in { // Little endian, available on all targets with VSX def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), @@ -3869,10 +3917,11 @@ let AddedComplexity = 400 in { (COPY_TO_REGCLASS (MTVSRD $A), VSRC), (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), - (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0), - (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), - (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>; + (XXPERMDI + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC), + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } @@ -3884,10 +3933,11 @@ let AddedComplexity = 400 in { (COPY_TO_REGCLASS (MTVSRD $B), VSRC), (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), - (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0), - (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), - (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>; + (XXPERMDI + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC), + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } @@ -3940,10 +3990,9 @@ let AddedComplexity = 400 in { def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), (v2i64 (MTVSRDD $rB, $rA))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (VMRGOW - (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)), - (v4i32 - (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>; + (MTVSRDD + (RLDIMI AnyExts.B, AnyExts.A, 32, 0), + (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>; } let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { @@ -3953,10 +4002,9 @@ let AddedComplexity = 400 in { def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), (v2i64 (MTVSRDD $rB, $rA))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (VMRGOW - (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)), - (v4i32 - (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>; + (MTVSRDD + (RLDIMI AnyExts.C, AnyExts.D, 32, 0), + (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>; } // P9 Altivec instructions that can be used to build vectors. // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete @@ -4005,3 +4053,21 @@ let AddedComplexity = 400 in { } } +// Put this P9Altivec related definition here since it's possible to be +// selected to VSX instruction xvnegsp, avoid possible undef. +let Predicates = [HasP9Altivec] in { + + def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))), + (v4i32 (VABSDUW $A, $B))>; + + def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))), + (v8i16 (VABSDUH $A, $B))>; + + def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))), + (v16i8 (VABSDUB $A, $B))>; + + // As PPCVABSD description, the last operand indicates whether do the + // sign bit flip. + def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))), + (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td new file mode 100644 index 000000000000..d2a09f30c0f3 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td @@ -0,0 +1,19 @@ +//===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the available hardware counters for PPC. +// +//===----------------------------------------------------------------------===// + +def CpuCyclesPfmCounter : PfmCounter<"CYCLES">; + +def DefaultPfmCounters : ProcPfmCounters { + let CycleCounter = CpuCyclesPfmCounter; +} +def : PfmCountersDefaultBinding<DefaultPfmCounters>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index 1892d1e3dc26..4458b92ceb5e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -33,6 +34,8 @@ STATISTIC(NumRRConvertedInPreEmit, "Number of r+r instructions converted to r+i in pre-emit peephole"); STATISTIC(NumRemovedInPreEmit, "Number of instructions deleted in pre-emit peephole"); +STATISTIC(NumberOfSelfCopies, + "Number of self copy instructions eliminated"); static cl::opt<bool> RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), @@ -60,9 +63,32 @@ namespace { return false; bool Changed = false; const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); SmallVector<MachineInstr *, 4> InstrsToErase; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { + unsigned Opc = MI.getOpcode(); + // Detect self copies - these can result from running AADB. + if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) { + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.getNumOperands() == 3 && + MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && + MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { + NumberOfSelfCopies++; + LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); + LLVM_DEBUG(MI.dump()); + InstrsToErase.push_back(&MI); + continue; + } + else if (MCID.getNumOperands() == 2 && + MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { + NumberOfSelfCopies++; + LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); + LLVM_DEBUG(MI.dump()); + InstrsToErase.push_back(&MI); + continue; + } + } MachineInstr *DefMIToErase = nullptr; if (TII->convertToImmediateForm(MI, &DefMIToErase)) { Changed = true; @@ -74,6 +100,75 @@ namespace { } } } + + // Eliminate conditional branch based on a constant CR bit by + // CRSET or CRUNSET. We eliminate the conditional branch or + // convert it into an unconditional branch. Also, if the CR bit + // is not used by other instructions, we eliminate CRSET as well. + auto I = MBB.getFirstInstrTerminator(); + if (I == MBB.instr_end()) + continue; + MachineInstr *Br = &*I; + if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) + continue; + MachineInstr *CRSetMI = nullptr; + unsigned CRBit = Br->getOperand(0).getReg(); + unsigned CRReg = getCRFromCRBit(CRBit); + bool SeenUse = false; + MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); + for (It++; It != Er; It++) { + if (It->modifiesRegister(CRBit, TRI)) { + if ((It->getOpcode() == PPC::CRUNSET || + It->getOpcode() == PPC::CRSET) && + It->getOperand(0).getReg() == CRBit) + CRSetMI = &*It; + break; + } + if (It->readsRegister(CRBit, TRI)) + SeenUse = true; + } + if (!CRSetMI) continue; + + unsigned CRSetOp = CRSetMI->getOpcode(); + if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || + (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { + // Remove this branch since it cannot be taken. + InstrsToErase.push_back(Br); + MBB.removeSuccessor(Br->getOperand(1).getMBB()); + } + else { + // This conditional branch is always taken. So, remove all branches + // and insert an unconditional branch to the destination of this. + MachineBasicBlock::iterator It = Br, Er = MBB.end(); + for (; It != Er; It++) { + if (It->isDebugInstr()) continue; + assert(It->isTerminator() && "Non-terminator after a terminator"); + InstrsToErase.push_back(&*It); + } + if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) { + ArrayRef<MachineOperand> NoCond; + TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr, + NoCond, Br->getDebugLoc()); + } + for (auto &Succ : MBB.successors()) + if (Succ != Br->getOperand(1).getMBB()) { + MBB.removeSuccessor(Succ); + break; + } + } + + // If the CRBit is not used by another instruction, we can eliminate + // CRSET/CRUNSET instruction. + if (!SeenUse) { + // We need to check use of the CRBit in successors. + for (auto &SuccMBB : MBB.successors()) + if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) { + SeenUse = true; + break; + } + if (!SeenUse) + InstrsToErase.push_back(CRSetMI); + } } for (MachineInstr *MI : InstrsToErase) { LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: "); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 96923a97a82c..3d067aa8e621 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -673,12 +673,15 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); unsigned SrcReg = MI.getOperand(0).getReg(); - BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL), - getCRFromCRBit(SrcReg)) - .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); - + // We need to move the CR field that contains the CR bit we are spilling. + // The super register may not be explicitly defined (i.e. it can be defined + // by a CR-logical that only defines the subreg) so we state that the CR + // field is undef. Also, in order to preserve the kill flag on the CR bit, + // we add it as an implicit use. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) - .addReg(getCRFromCRBit(SrcReg)); + .addReg(getCRFromCRBit(SrcReg), RegState::Undef) + .addReg(SrcReg, + RegState::Implicit | getKillRegState(MI.getOperand(0).isKill())); // If the saved register wasn't CR0LT, shift the bits left so that the bit to // store is the first one. Mask all but that bit. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 91a98ee4efc7..e93fe4ce3453 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -85,8 +85,6 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const override; bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override; - bool enableMultipleCopyHints() const override { return true; } - /// We require the register scavenger. bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; @@ -141,6 +139,23 @@ public: // Base pointer (stack realignment) support. unsigned getBaseRegister(const MachineFunction &MF) const; bool hasBasePointer(const MachineFunction &MF) const; + + /// stripRegisterPrefix - This method strips the character prefix from a + /// register name so that only the number is left. Used by for linux asm. + static const char *stripRegisterPrefix(const char *RegName) { + switch (RegName[0]) { + case 'r': + case 'f': + case 'q': // for QPX + case 'v': + if (RegName[1] == 's') + return RegName + 2; + return RegName + 1; + case 'c': if (RegName[1] == 'r') return RegName + 2; + } + + return RegName; + } }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 0e641cf9e00a..d0d29b6d2c7d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -85,6 +85,12 @@ class VSRL<FPR SubReg, string n> : PPCReg<n> { let SubRegIndices = [sub_64]; } +// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering +// and encoding to match. +class VSXReg<bits<6> num, string n> : PPCReg<n> { + let HWEncoding{5-0} = num; +} + // CR - One of the 8 4-bit condition registers class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { let HWEncoding{2-0} = num; @@ -148,7 +154,7 @@ foreach Index = 0-31 in { // Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for // asm printing. foreach Index = 32-63 in { - def VSX#Index : PPCReg<"vs"#Index>; + def VSX#Index : VSXReg<Index, "vs"#Index>; } // The reprsentation of r0 when treated as the constant 0. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td index 5ad0a517c117..c8fe7d7eea78 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -42,7 +42,6 @@ def IIC_LdStLoad : InstrItinClass; def IIC_LdStLoadUpd : InstrItinClass; def IIC_LdStLoadUpdX : InstrItinClass; def IIC_LdStStore : InstrItinClass; -def IIC_LdStStoreUpd : InstrItinClass; def IIC_LdStDSS : InstrItinClass; def IIC_LdStICBI : InstrItinClass; def IIC_LdStLD : InstrItinClass; @@ -63,8 +62,8 @@ def IIC_LdStSLBIA : InstrItinClass; def IIC_LdStSLBIE : InstrItinClass; def IIC_LdStSTD : InstrItinClass; def IIC_LdStSTDCX : InstrItinClass; -def IIC_LdStSTDU : InstrItinClass; -def IIC_LdStSTDUX : InstrItinClass; +def IIC_LdStSTU : InstrItinClass; +def IIC_LdStSTUX : InstrItinClass; def IIC_LdStSTFD : InstrItinClass; def IIC_LdStSTFDU : InstrItinClass; def IIC_LdStSTVEBX : InstrItinClass; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td index 2455e5e52de5..646822eedbe0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td @@ -280,13 +280,6 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [P440_LWB]>], [1, 1, 1], [NoBypass, P440_GPR_Bypass]>, - InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>, - InstrStage<1, [P440_LRACC]>, - InstrStage<1, [P440_AGEN]>, - InstrStage<1, [P440_CRD]>, - InstrStage<2, [P440_LWB]>], - [2, 1, 1, 1], - [NoBypass, P440_GPR_Bypass]>, InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, InstrStage<1, [P440_LRACC]>, InstrStage<1, [P440_AGEN]>, @@ -373,14 +366,14 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [P440_LWB]>], [4, 1, 1], [NoBypass, P440_GPR_Bypass]>, - InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrItinData<IIC_LdStSTU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, InstrStage<1, [P440_LRACC]>, InstrStage<1, [P440_AGEN]>, InstrStage<1, [P440_CRD]>, InstrStage<2, [P440_LWB]>], [2, 1, 1, 1], [NoBypass, P440_GPR_Bypass]>, - InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrItinData<IIC_LdStSTUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, InstrStage<1, [P440_LRACC]>, InstrStage<1, [P440_AGEN]>, InstrStage<1, [P440_CRD]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td index 54cfae5d74b7..f34c1accc0fd 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td @@ -81,8 +81,6 @@ def PPCA2Itineraries : ProcessorItineraries< [6, 0, 0]>, InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>], [0, 0, 0]>, - InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>], - [2, 0, 0, 0]>, InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>], [16, 0, 0]>, InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>], @@ -105,9 +103,9 @@ def PPCA2Itineraries : ProcessorItineraries< [82, 0, 0]>, // L2 latency InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>], [0, 0, 0]>, - InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>], + InstrItinData<IIC_LdStSTU, [InstrStage<1, [A2_XU]>], [2, 0, 0, 0]>, - InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>], + InstrItinData<IIC_LdStSTUX, [InstrStage<1, [A2_XU]>], [2, 0, 0, 0]>, InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>], [82, 0, 0]>, // L2 latency diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td index d7c2bd15a258..479a970b2537 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td @@ -144,7 +144,13 @@ def PPCE500Itineraries : ProcessorItineraries< InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 [NoBypass, E500_GPR_Bypass]>, - InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrItinData<IIC_LdStSTU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, InstrStage<1, [E500_SU0, E500_SU1], 0>, InstrStage<1, [E500_LSU_0]>], [6, 1], // Latency = 3 diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td index 5f95f2a79f66..d8bda073833f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -157,7 +157,13 @@ def PPCE500mcItineraries : ProcessorItineraries< InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 [NoBypass, E500mc_GPR_Bypass]>, - InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrItinData<IIC_LdStSTU, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500mc_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, InstrStage<1, [E500mc_LSU_0]>], [6, 1], // Latency = 3 diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td index 32f8e652dd56..3e50803955c4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td @@ -206,12 +206,6 @@ def PPCE5500Itineraries : ProcessorItineraries< InstrStage<1, [E5500_LSU_0]>], [7, 2], // Latency = 3, Repeat rate = 1 [NoBypass, E5500_GPR_Bypass]>, - InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, - InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, - InstrStage<1, [E5500_LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, E5500_GPR_Bypass], - 2>, // 2 micro-ops InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<1, [E5500_LSU_0]>], [7, 2], // Latency = 3, Repeat rate = 1 @@ -281,13 +275,13 @@ def PPCE5500Itineraries : ProcessorItineraries< InstrStage<1, [E5500_LSU_0]>], [7, 2], // Latency = 3, Repeat rate = 1 [NoBypass, E5500_GPR_Bypass]>, - InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrItinData<IIC_LdStSTU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, InstrStage<1, [E5500_LSU_0]>], [7, 2], // Latency = 3, Repeat rate = 1 [NoBypass, E5500_GPR_Bypass], 2>, // 2 micro-ops - InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, InstrStage<1, [E5500_LSU_0]>], [7, 2], // Latency = 3, Repeat rate = 1 diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td index 21efd8f8f6c9..0995b7200d93 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td @@ -43,7 +43,8 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>, InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>, InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>, - InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStSTU , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStSTUX , [InstrStage<2, [G3_SLU]>]>, InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>, InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>, InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td index 340773ef7876..1b15c7b3c7ad 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td @@ -48,7 +48,8 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>, InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>, InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>, - InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTU , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTUX , [InstrStage<2, [G4_SLU]>]>, InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>, InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>, InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td index 1d9f13fcb850..0044c3c6a449 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -56,7 +56,6 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>, - InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>, InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>, @@ -73,8 +72,8 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>, - InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>, - InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTU , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTUX , [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>, InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td index b5a9f96d45ae..c802b80170fb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td @@ -54,7 +54,6 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>, - InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>, InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>, InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>, @@ -76,8 +75,8 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>, InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>, - InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>, - InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTU , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTUX , [InstrStage<3, [G5_SLU]>]>, InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>, InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>, InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td index a8678f56900e..1d6e509819da 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -114,6 +114,10 @@ def P7Itineraries : ProcessorItineraries< P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_FX1, P7_FX2]>], [4, 1, 1]>, + InstrItinData<IIC_IntMulHD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2, P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_FX1, P7_FX2]>], @@ -126,6 +130,10 @@ def P7Itineraries : ProcessorItineraries< P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_FX1, P7_FX2]>], [1, 1, 1]>, + InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2, P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_FX1, P7_FX2]>], @@ -253,13 +261,13 @@ def P7Itineraries : ProcessorItineraries< InstrStage<1, [P7_LS1, P7_LS2], 0>, InstrStage<1, [P7_FX1, P7_FX2]>], [1, 1, 1]>, - InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>, + InstrItinData<IIC_LdStSTU , [InstrStage<1, [P7_DU1], 0>, InstrStage<1, [P7_DU2], 0>, InstrStage<1, [P7_LS1, P7_LS2], 0>, InstrStage<1, [P7_FX1, P7_FX2]>, InstrStage<1, [P7_FX1, P7_FX2]>], [2, 1, 1, 1]>, - InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>, + InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P7_DU1], 0>, InstrStage<1, [P7_DU2], 0>, InstrStage<1, [P7_DU3], 0>, InstrStage<1, [P7_DU4], 0>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td index 79963dd6a3e9..ff39dfda7016 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td @@ -90,6 +90,10 @@ def P8Itineraries : ProcessorItineraries< P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FXU1, P8_FXU2]>], [4, 1, 1]>, + InstrItinData<IIC_IntMulHD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [4, 1, 1]>, InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FXU1, P8_FXU2]>], @@ -102,6 +106,10 @@ def P8Itineraries : ProcessorItineraries< P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FXU1, P8_FXU2]>], [1, 1, 1]>, + InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [1, 1, 1]>, InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FXU1, P8_FXU2]>], @@ -259,14 +267,14 @@ def P8Itineraries : ProcessorItineraries< InstrStage<1, [P8_LU1, P8_LU2, P8_LSU1, P8_LSU2]>] [1, 1, 1]>, - InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>, + InstrItinData<IIC_LdStSTU , [InstrStage<1, [P8_DU1], 0>, InstrStage<1, [P8_DU2], 0>, InstrStage<1, [P8_LU1, P8_LU2, P8_LSU1, P8_LSU2], 0>, InstrStage<1, [P8_FXU1, P8_FXU2]>], [2, 1, 1, 1]>, // First+last - InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>, + InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P8_DU1], 0>, InstrStage<1, [P8_DU2], 0>, InstrStage<1, [P8_DU3], 0>, InstrStage<1, [P8_DU4], 0>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td index e1a480117315..a1e625c855e0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -33,6 +33,12 @@ def P9Model : SchedMachineModel { // A dispatch group is 6 instructions. let LoopMicroOpBufferSize = 60; + // As iops are dispatched to a slice, they are held in an independent slice + // issue queue until all register sources and other dependencies have been + // resolved and they can be issued. Each of four execution slices has an + // 11-entry iop issue queue. + let MicroOpBufferSize = 44; + let CompleteModel = 1; // Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index a8d7955ef548..580d057602f5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -181,6 +181,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, const TargetOptions &Options) { + if (TT.isOSDarwin()) + report_fatal_error("Darwin is no longer supported for PowerPC"); + if (Options.MCOptions.getABIName().startswith("elfv1")) return PPCTargetMachine::PPC_ABI_ELFv1; else if (Options.MCOptions.getABIName().startswith("elfv2")) @@ -211,19 +214,24 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT, if (TT.isOSDarwin()) return Reloc::DynamicNoPIC; - // Non-darwin 64-bit platforms are PIC by default. - if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) + // Big Endian PPC is PIC by default. + if (TT.getArch() == Triple::ppc64) return Reloc::PIC_; - // 32-bit is static by default. + // Rest are static by default. return Reloc::Static; } -static CodeModel::Model getEffectiveCodeModel(const Triple &TT, - Optional<CodeModel::Model> CM, - bool JIT) { - if (CM) +static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, + Optional<CodeModel::Model> CM, + bool JIT) { + if (CM) { + if (*CM == CodeModel::Tiny) + report_fatal_error("Target does not support the tiny CodeModel"); + if (*CM == CodeModel::Kernel) + report_fatal_error("Target does not support the kernel CodeModel"); return *CM; + } if (!TT.isOSDarwin() && !JIT && (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)) return CodeModel::Medium; @@ -243,7 +251,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, computeFSAdditions(FS, OL, TT), Options, getEffectiveRelocModel(TT, RM), - getEffectiveCodeModel(TT, CM, JIT), OL), + getEffectivePPCCodeModel(TT, CM, JIT), OL), TLOF(createTLOF(getTargetTriple())), TargetABI(computeTargetABI(TT, Options)) { initAsmInfo(); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b0da9b5a6d70..bc9bcab83a0a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -473,7 +473,14 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, + bool UseMaskForCond, + bool UseMaskForGaps) { + if (UseMaskForCond || UseMaskForGaps) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace, + UseMaskForCond, UseMaskForGaps); + assert(isa<VectorType>(VecTy) && "Expect a vector type for interleaved memory op"); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 2ee2b3eb8084..9221a910288a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -90,7 +90,9 @@ public: unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, + bool UseMaskForCond = false, + bool UseMaskForGaps = false); /// @} }; |